From c0981da47d5696fe36474fcf86b4ce03ae3ff818 Mon Sep 17 00:00:00 2001
From: Dimitry Andric <dim@FreeBSD.org>
Date: Fri, 19 Nov 2021 21:06:13 +0100
Subject: Vendor import of llvm-project main
 llvmorg-14-init-10186-gff7f2cfa959b.

---
 llvm/include/llvm-c/Comdat.h                       |   11 +
 llvm/include/llvm-c/Core.h                         |   16 +-
 llvm/include/llvm-c/DebugInfo.h                    |   82 +-
 llvm/include/llvm-c/DisassemblerTypes.h            |   10 +
 llvm/include/llvm-c/Error.h                        |   11 +
 llvm/include/llvm-c/ErrorHandling.h                |   10 +
 llvm/include/llvm-c/IRReader.h                     |   11 +
 llvm/include/llvm-c/LLJIT.h                        |   11 +
 llvm/include/llvm-c/Linker.h                       |   11 +
 llvm/include/llvm-c/Orc.h                          |   54 +
 llvm/include/llvm-c/OrcEE.h                        |   11 +
 llvm/include/llvm-c/Support.h                      |   10 +
 llvm/include/llvm-c/TargetMachine.h                |   10 +
 llvm/include/llvm-c/Transforms/PassBuilder.h       |   13 +-
 llvm/include/llvm-c/lto.h                          |   12 +-
 llvm/include/llvm/ADT/APFloat.h                    |    4 +-
 llvm/include/llvm/ADT/APInt.h                      |  935 ++++++-----
 llvm/include/llvm/ADT/APSInt.h                     |   10 +-
 llvm/include/llvm/ADT/ArrayRef.h                   |    4 +-
 llvm/include/llvm/ADT/BitVector.h                  |   24 +-
 llvm/include/llvm/ADT/CombinationGenerator.h       |  148 ++
 llvm/include/llvm/ADT/DenseMapInfo.h               |   16 +-
 llvm/include/llvm/ADT/EquivalenceClasses.h         |   33 +-
 llvm/include/llvm/ADT/FunctionExtras.h             |   16 +-
 llvm/include/llvm/ADT/Hashing.h                    |    8 +
 llvm/include/llvm/ADT/ImmutableList.h              |    3 +-
 llvm/include/llvm/ADT/IntervalMap.h                |    2 +-
 llvm/include/llvm/ADT/MapVector.h                  |    1 +
 llvm/include/llvm/ADT/PointerIntPair.h             |    4 +-
 llvm/include/llvm/ADT/PointerUnion.h               |   31 +-
 llvm/include/llvm/ADT/STLExtras.h                  |  171 +-
 llvm/include/llvm/ADT/Sequence.h                   |  164 +-
 llvm/include/llvm/ADT/SetOperations.h              |    9 -
 llvm/include/llvm/ADT/SmallBitVector.h             |   51 +-
 llvm/include/llvm/ADT/SmallVector.h                |   15 +-
 llvm/include/llvm/ADT/StringExtras.h               |  137 +-
 llvm/include/llvm/ADT/StringMap.h                  |   27 +-
 llvm/include/llvm/ADT/StringRef.h                  |    3 +-
 llvm/include/llvm/ADT/Triple.h                     |  158 +-
 llvm/include/llvm/ADT/TypeSwitch.h                 |    7 +-
 llvm/include/llvm/ADT/iterator.h                   |   49 +-
 llvm/include/llvm/Analysis/AliasAnalysis.h         |   69 +-
 llvm/include/llvm/Analysis/AssumeBundleQueries.h   |    9 +-
 llvm/include/llvm/Analysis/AssumptionCache.h       |   10 +-
 llvm/include/llvm/Analysis/BasicAliasAnalysis.h    |   82 +-
 llvm/include/llvm/Analysis/CGSCCPassManager.h      |  101 +-
 llvm/include/llvm/Analysis/CaptureTracking.h       |   25 +-
 llvm/include/llvm/Analysis/ConstantFolding.h       |   30 +-
 llvm/include/llvm/Analysis/CostModel.h             |   26 +
 llvm/include/llvm/Analysis/Delinearization.h       |  105 ++
 llvm/include/llvm/Analysis/HeatUtils.h             |    7 +-
 .../include/llvm/Analysis/IRSimilarityIdentifier.h |  250 ++-
 llvm/include/llvm/Analysis/IVDescriptors.h         |   88 +-
 llvm/include/llvm/Analysis/IVUsers.h               |    3 -
 llvm/include/llvm/Analysis/InlineAdvisor.h         |   38 +-
 llvm/include/llvm/Analysis/InlineCost.h            |    3 +
 llvm/include/llvm/Analysis/InlineOrder.h           |  172 ++
 llvm/include/llvm/Analysis/InstructionSimplify.h   |    2 +-
 llvm/include/llvm/Analysis/LazyCallGraph.h         |   65 +-
 llvm/include/llvm/Analysis/LoopAccessAnalysis.h    |   26 +-
 llvm/include/llvm/Analysis/LoopAnalysisManager.h   |    1 +
 llvm/include/llvm/Analysis/LoopInfo.h              |   17 +-
 llvm/include/llvm/Analysis/LoopInfoImpl.h          |    5 +-
 llvm/include/llvm/Analysis/LoopNestAnalysis.h      |   22 +-
 llvm/include/llvm/Analysis/MLInlineAdvisor.h       |    2 +
 llvm/include/llvm/Analysis/MemorySSA.h             |   30 +-
 llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h  |   28 +-
 llvm/include/llvm/Analysis/ObjCARCUtil.h           |   45 +-
 llvm/include/llvm/Analysis/ProfileSummaryInfo.h    |   12 +-
 llvm/include/llvm/Analysis/ReplayInlineAdvisor.h   |   53 +-
 llvm/include/llvm/Analysis/ScalarEvolution.h       |  282 ++--
 llvm/include/llvm/Analysis/StackLifetime.h         |    2 +
 llvm/include/llvm/Analysis/StackSafetyAnalysis.h   |    8 +
 llvm/include/llvm/Analysis/TargetLibraryInfo.h     |    6 +-
 llvm/include/llvm/Analysis/TargetTransformInfo.h   |   83 +-
 .../llvm/Analysis/TargetTransformInfoImpl.h        |  131 +-
 llvm/include/llvm/Analysis/TypeMetadataUtils.h     |   28 +-
 llvm/include/llvm/Analysis/Utils/TFUtils.h         |    7 +-
 llvm/include/llvm/Analysis/ValueTracking.h         |   36 +-
 llvm/include/llvm/Analysis/VectorUtils.h           |   12 +-
 llvm/include/llvm/AsmParser/LLLexer.h              |    4 +-
 llvm/include/llvm/AsmParser/LLParser.h             |   27 +-
 llvm/include/llvm/AsmParser/LLToken.h              |    8 +-
 llvm/include/llvm/BinaryFormat/Dwarf.def           |    3 +
 llvm/include/llvm/BinaryFormat/DynamicTags.def     |   12 +
 llvm/include/llvm/BinaryFormat/ELF.h               |   23 +
 llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def |    4 -
 llvm/include/llvm/BinaryFormat/MachO.def           |    2 +
 llvm/include/llvm/BinaryFormat/Wasm.h              |   41 +-
 llvm/include/llvm/BinaryFormat/WasmTraits.h        |   18 +-
 llvm/include/llvm/BinaryFormat/XCOFF.h             |   16 +
 llvm/include/llvm/Bitcode/BitcodeAnalyzer.h        |    2 +
 llvm/include/llvm/Bitcode/BitcodeCommon.h          |    8 +-
 llvm/include/llvm/Bitcode/LLVMBitCodes.h           |    1 +
 llvm/include/llvm/CodeGen/Analysis.h               |    5 +-
 llvm/include/llvm/CodeGen/AsmPrinter.h             |    7 +-
 llvm/include/llvm/CodeGen/BasicTTIImpl.h           |  264 ++--
 llvm/include/llvm/CodeGen/CodeGenCommonISel.h      |  219 +++
 llvm/include/llvm/CodeGen/CommandFlags.h           |    7 +-
 llvm/include/llvm/CodeGen/FunctionLoweringInfo.h   |    1 -
 .../include/llvm/CodeGen/GlobalISel/CallLowering.h |   14 +-
 .../llvm/CodeGen/GlobalISel/CombinerHelper.h       |  131 +-
 .../llvm/CodeGen/GlobalISel/GenericMachineInstrs.h |   37 +-
 .../include/llvm/CodeGen/GlobalISel/IRTranslator.h |   38 +-
 .../llvm/CodeGen/GlobalISel/LegacyLegalizerInfo.h  |    2 +-
 .../GlobalISel/LegalizationArtifactCombiner.h      |  184 ++-
 .../llvm/CodeGen/GlobalISel/LegalizerHelper.h      |   17 +
 .../llvm/CodeGen/GlobalISel/LegalizerInfo.h        |   29 +-
 .../include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h |  165 ++
 .../llvm/CodeGen/GlobalISel/MIPatternMatch.h       |   49 +-
 .../llvm/CodeGen/GlobalISel/MachineIRBuilder.h     |    8 +
 llvm/include/llvm/CodeGen/GlobalISel/Utils.h       |  100 +-
 llvm/include/llvm/CodeGen/ISDOpcodes.h             |   11 +
 llvm/include/llvm/CodeGen/IndirectThunks.h         |    2 +-
 .../llvm/CodeGen/LinkAllAsmWriterComponents.h      |    3 +
 .../llvm/CodeGen/LinkAllCodegenComponents.h        |    3 +
 llvm/include/llvm/CodeGen/LiveInterval.h           |   10 +-
 llvm/include/llvm/CodeGen/LiveIntervalUnion.h      |   29 +-
 llvm/include/llvm/CodeGen/LiveVariables.h          |    6 +
 llvm/include/llvm/CodeGen/LowLevelType.h           |    3 +-
 llvm/include/llvm/CodeGen/MIRFSDiscriminator.h     |    4 +
 llvm/include/llvm/CodeGen/MIRFormatter.h           |    7 +-
 llvm/include/llvm/CodeGen/MIRSampleProfile.h       |   76 +
 llvm/include/llvm/CodeGen/MIRYamlMapping.h         |    2 +
 llvm/include/llvm/CodeGen/MachineCombinerPattern.h |   13 +-
 llvm/include/llvm/CodeGen/MachineDominators.h      |   16 +-
 llvm/include/llvm/CodeGen/MachineFrameInfo.h       |    2 +
 llvm/include/llvm/CodeGen/MachineFunction.h        |   16 +-
 llvm/include/llvm/CodeGen/MachineInstr.h           |   14 +-
 llvm/include/llvm/CodeGen/MachineMemOperand.h      |   12 +-
 .../CodeGen/MachineOptimizationRemarkEmitter.h     |    6 +
 llvm/include/llvm/CodeGen/MachineRegisterInfo.h    |   41 +-
 llvm/include/llvm/CodeGen/MacroFusion.h            |   14 +
 llvm/include/llvm/CodeGen/Passes.h                 |   12 +
 llvm/include/llvm/CodeGen/RegAllocCommon.h         |    7 +-
 llvm/include/llvm/CodeGen/RegisterScavenging.h     |    3 -
 llvm/include/llvm/CodeGen/SelectionDAG.h           |   91 +-
 .../llvm/CodeGen/SelectionDAGAddressAnalysis.h     |    1 +
 llvm/include/llvm/CodeGen/SelectionDAGNodes.h      |  238 ++-
 llvm/include/llvm/CodeGen/SwitchLoweringUtils.h    |    8 +-
 llvm/include/llvm/CodeGen/TargetCallingConv.h      |    8 +-
 llvm/include/llvm/CodeGen/TargetInstrInfo.h        |   33 +-
 llvm/include/llvm/CodeGen/TargetLowering.h         |   84 +-
 llvm/include/llvm/CodeGen/TargetPassConfig.h       |   14 +-
 llvm/include/llvm/CodeGen/TargetRegisterInfo.h     |    6 +-
 llvm/include/llvm/CodeGen/TargetSchedule.h         |    1 -
 llvm/include/llvm/CodeGen/ValueTypes.td            |    3 +-
 llvm/include/llvm/DWARFLinker/DWARFLinker.h        |   24 +-
 llvm/include/llvm/DebugInfo/CodeView/CVRecord.h    |    1 -
 .../llvm/DebugInfo/CodeView/CodeViewRegisters.def  |    2 +-
 llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h   |    1 +
 .../DebugInfo/DWARF/DWARFAbbreviationDeclaration.h |   21 +
 .../llvm/DebugInfo/DWARF/DWARFAddressRange.h       |    6 +-
 llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h   |   28 +-
 .../llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h     |   32 +-
 llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h |    4 -
 .../llvm/DebugInfo/DWARF/DWARFDebugRangeList.h     |    7 +-
 llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h       |   14 +-
 .../include/llvm/DebugInfo/DWARF/DWARFExpression.h |   40 +-
 llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h |   17 +-
 llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h  |   30 +-
 llvm/include/llvm/DebugInfo/GSYM/StringTable.h     |    1 -
 llvm/include/llvm/DebugInfo/MSF/MSFCommon.h        |    3 +
 .../include/llvm/DebugInfo/MSF/MappedBlockStream.h |   20 +-
 .../llvm/DebugInfo/PDB/Native/DbiModuleList.h      |    4 +-
 llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h |    5 +-
 .../llvm/DebugInfo/PDB/Native/NamedStreamMap.h     |    1 -
 .../llvm/DebugInfo/PDB/Native/NativeLineNumber.h   |    1 -
 .../DebugInfo/PDB/Native/NativeTypeFunctionSig.h   |    1 -
 .../llvm/DebugInfo/PDB/Native/NativeTypeVTShape.h  |    1 -
 .../llvm/DebugInfo/PDB/Native/PDBFileBuilder.h     |    1 -
 llvm/include/llvm/Demangle/Demangle.h              |   14 +-
 llvm/include/llvm/Demangle/ItaniumDemangle.h       | 1315 ++++++++--------
 .../include/llvm/Demangle/MicrosoftDemangleNodes.h |   93 +-
 llvm/include/llvm/Demangle/Utility.h               |   51 +-
 .../include/llvm/ExecutionEngine/ExecutionEngine.h |    1 -
 .../llvm/ExecutionEngine/JITLink/ELF_aarch64.h     |   39 +
 .../llvm/ExecutionEngine/JITLink/ELF_riscv.h       |    2 +-
 .../llvm/ExecutionEngine/JITLink/ELF_x86_64.h      |   20 +-
 .../include/llvm/ExecutionEngine/JITLink/JITLink.h |   98 +-
 .../ExecutionEngine/JITLink/JITLinkMemoryManager.h |  420 ++++-
 .../llvm/ExecutionEngine/JITLink/MachO_arm64.h     |    2 +
 .../llvm/ExecutionEngine/JITLink/MemoryFlags.h     |  225 +++
 .../llvm/ExecutionEngine/JITLink/TableManager.h    |   63 +
 .../include/llvm/ExecutionEngine/JITLink/aarch64.h |   38 +
 llvm/include/llvm/ExecutionEngine/JITLink/riscv.h  |   14 +-
 llvm/include/llvm/ExecutionEngine/JITLink/x86_64.h |  275 +++-
 llvm/include/llvm/ExecutionEngine/MCJIT.h          |    3 +
 llvm/include/llvm/ExecutionEngine/Orc/Core.h       |  100 +-
 .../ExecutionEngine/Orc/DebuggerSupportPlugin.h    |   64 +
 .../llvm/ExecutionEngine/Orc/ELFNixPlatform.h      |  330 ++++
 .../ExecutionEngine/Orc/EPCDebugObjectRegistrar.h  |    9 +-
 .../llvm/ExecutionEngine/Orc/EPCEHFrameRegistrar.h |    9 +-
 .../ExecutionEngine/Orc/EPCGenericDylibManager.h   |   67 +
 .../Orc/EPCGenericJITLinkMemoryManager.h           |   97 ++
 .../ExecutionEngine/Orc/EPCGenericMemoryAccess.h   |   85 +
 .../Orc/EPCGenericRTDyldMemoryManager.h            |  133 ++
 .../llvm/ExecutionEngine/Orc/EPCIndirectionUtils.h |    6 +-
 .../ExecutionEngine/Orc/ExecutorProcessControl.h   |  272 +++-
 .../llvm/ExecutionEngine/Orc/IndirectionUtils.h    |   34 +
 .../llvm/ExecutionEngine/Orc/LLVMSPSSerializers.h  |   69 -
 .../ExecutionEngine/Orc/LookupAndRecordAddrs.h     |   70 +
 .../llvm/ExecutionEngine/Orc/MachOPlatform.h       |   88 +-
 .../llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h  |    6 +-
 .../Orc/OrcRPCExecutorProcessControl.h             |  436 -----
 .../ExecutionEngine/Orc/OrcRemoteTargetClient.h    |  925 -----------
 .../ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h    |  386 -----
 .../ExecutionEngine/Orc/OrcRemoteTargetServer.h    |  464 ------
 .../ExecutionEngine/Orc/Shared/ExecutorAddress.h   |  138 +-
 .../ExecutionEngine/Orc/Shared/FDRawByteChannel.h  |   79 -
 .../llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h  |   68 +
 .../llvm/ExecutionEngine/Orc/Shared/RPCUtils.h     | 1659 --------------------
 .../ExecutionEngine/Orc/Shared/RawByteChannel.h    |  183 ---
 .../ExecutionEngine/Orc/Shared/Serialization.h     |  769 ---------
 .../Orc/Shared/SimplePackedSerialization.h         |  124 +-
 .../Orc/Shared/SimpleRemoteEPCUtils.h              |  235 +++
 .../Orc/Shared/TargetProcessControlTypes.h         |  286 +++-
 .../Orc/Shared/WrapperFunctionUtils.h              |  124 +-
 .../llvm/ExecutionEngine/Orc/SimpleRemoteEPC.h     |  140 ++
 .../Orc/TargetProcess/ExecutorBootstrapService.h   |   36 +
 .../Orc/TargetProcess/JITLoaderGDB.h               |    2 +-
 .../Orc/TargetProcess/OrcRPCTPCServer.h            |  660 --------
 .../Orc/TargetProcess/RegisterEHFrames.h           |   20 +-
 .../Orc/TargetProcess/SimpleExecutorDylibManager.h |   64 +
 .../TargetProcess/SimpleExecutorMemoryManager.h    |   70 +
 .../Orc/TargetProcess/SimpleRemoteEPCServer.h      |  182 +++
 .../llvm/ExecutionEngine/Orc/TaskDispatch.h        |  131 ++
 .../llvm/ExecutionEngine/OrcMCJITReplacement.h     |   37 -
 .../llvm/ExecutionEngine/OrcV1Deprecation.h        |   22 -
 llvm/include/llvm/ExecutionEngine/RuntimeDyld.h    |   14 +
 llvm/include/llvm/Frontend/OpenMP/OMP.td           |   69 +-
 llvm/include/llvm/Frontend/OpenMP/OMPConstants.h   |    8 +
 llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h  |  117 +-
 llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h   |  430 ++++-
 llvm/include/llvm/Frontend/OpenMP/OMPKinds.def     |   63 +-
 llvm/include/llvm/IR/AbstractCallSite.h            |    2 +-
 llvm/include/llvm/IR/Argument.h                    |    2 +-
 llvm/include/llvm/IR/Assumptions.h                 |   22 +-
 llvm/include/llvm/IR/Attributes.h                  |  371 +++--
 llvm/include/llvm/IR/Attributes.td                 |    3 +
 llvm/include/llvm/IR/BasicBlock.h                  |   12 +-
 llvm/include/llvm/IR/Constant.h                    |    6 +
 llvm/include/llvm/IR/ConstantRange.h               |   40 +
 llvm/include/llvm/IR/Constants.h                   |   15 +-
 llvm/include/llvm/IR/DIBuilder.h                   |   80 +-
 llvm/include/llvm/IR/DataLayout.h                  |   23 +-
 llvm/include/llvm/IR/DebugInfo.h                   |    2 -
 llvm/include/llvm/IR/DebugInfoMetadata.h           |  437 +++---
 llvm/include/llvm/IR/DerivedTypes.h                |    7 +-
 llvm/include/llvm/IR/DiagnosticInfo.h              |   36 +-
 llvm/include/llvm/IR/DiagnosticPrinter.h           |    2 +-
 llvm/include/llvm/IR/Dominators.h                  |    6 +
 llvm/include/llvm/IR/FPEnv.h                       |   14 +-
 llvm/include/llvm/IR/Function.h                    |  252 ++-
 llvm/include/llvm/IR/GCStrategy.h                  |    3 +
 llvm/include/llvm/IR/GlobalAlias.h                 |   33 +-
 llvm/include/llvm/IR/GlobalIFunc.h                 |   44 +-
 llvm/include/llvm/IR/GlobalIndirectSymbol.h        |   93 --
 llvm/include/llvm/IR/GlobalObject.h                |    7 +-
 llvm/include/llvm/IR/GlobalValue.h                 |   14 +-
 llvm/include/llvm/IR/IRBuilder.h                   |   32 +-
 llvm/include/llvm/IR/InstrTypes.h                  |  292 ++--
 llvm/include/llvm/IR/Instruction.h                 |   18 +-
 llvm/include/llvm/IR/Instructions.h                |  105 +-
 llvm/include/llvm/IR/IntrinsicInst.h               |   22 +
 llvm/include/llvm/IR/Intrinsics.h                  |    3 +-
 llvm/include/llvm/IR/Intrinsics.td                 |  158 +-
 llvm/include/llvm/IR/IntrinsicsAArch64.td          |   85 +-
 llvm/include/llvm/IR/IntrinsicsAMDGPU.td           |   52 +-
 llvm/include/llvm/IR/IntrinsicsBPF.td              |    3 +
 llvm/include/llvm/IR/IntrinsicsNVVM.td             |  778 ++++-----
 llvm/include/llvm/IR/IntrinsicsPowerPC.td          |   60 +-
 llvm/include/llvm/IR/IntrinsicsRISCV.td            |  177 ++-
 llvm/include/llvm/IR/IntrinsicsSystemZ.td          |    8 +-
 llvm/include/llvm/IR/IntrinsicsWebAssembly.td      |   70 +-
 llvm/include/llvm/IR/IntrinsicsX86.td              |  762 ++++++++-
 llvm/include/llvm/IR/LLVMContext.h                 |    4 +
 llvm/include/llvm/IR/MatrixBuilder.h               |   32 +-
 llvm/include/llvm/IR/Metadata.h                    |   35 +
 llvm/include/llvm/IR/Module.h                      |    9 +-
 llvm/include/llvm/IR/ModuleSummaryIndex.h          |   50 +
 llvm/include/llvm/IR/Operator.h                    |    7 +
 llvm/include/llvm/IR/OptBisect.h                   |   26 +-
 llvm/include/llvm/IR/PassManager.h                 |   81 +-
 llvm/include/llvm/IR/PassManagerInternal.h         |    9 +
 llvm/include/llvm/IR/PatternMatch.h                |  131 +-
 llvm/include/llvm/IR/ProfileSummary.h              |   38 +-
 llvm/include/llvm/IR/PseudoProbe.h                 |    4 -
 llvm/include/llvm/IR/ReplaceConstant.h             |    4 -
 llvm/include/llvm/IR/RuntimeLibcalls.def           |    4 +
 llvm/include/llvm/IR/Type.h                        |   38 +-
 llvm/include/llvm/IR/VPIntrinsics.def              |  141 +-
 llvm/include/llvm/IR/Value.h                       |   35 +-
 llvm/include/llvm/InitializePasses.h               |    5 +-
 llvm/include/llvm/InterfaceStub/IFSHandler.h       |    3 +
 llvm/include/llvm/LTO/Caching.h                    |   38 -
 llvm/include/llvm/LTO/Config.h                     |    3 +
 llvm/include/llvm/LTO/LTO.h                        |   47 +-
 llvm/include/llvm/LTO/SummaryBasedOptimizations.h  |    2 +
 llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h    |    2 +-
 llvm/include/llvm/LTO/legacy/LTOModule.h           |    4 +
 llvm/include/llvm/LinkAllIR.h                      |    3 +
 llvm/include/llvm/LinkAllPasses.h                  |    3 +
 llvm/include/llvm/MC/MCAsmBackend.h                |   10 +-
 llvm/include/llvm/MC/MCAsmInfoGOFF.h               |   29 +
 llvm/include/llvm/MC/MCContext.h                   |    2 +-
 llvm/include/llvm/MC/MCDwarf.h                     |   38 +-
 llvm/include/llvm/MC/MCELFObjectWriter.h           |    2 +
 llvm/include/llvm/MC/MCELFStreamer.h               |    2 +-
 llvm/include/llvm/MC/MCExpr.h                      |    2 +
 llvm/include/llvm/MC/MCFragment.h                  |   31 +-
 llvm/include/llvm/MC/MCInstrAnalysis.h             |   11 +-
 llvm/include/llvm/MC/MCInstrDesc.h                 |    4 +-
 llvm/include/llvm/MC/MCObjectFileInfo.h            |    4 +
 llvm/include/llvm/MC/MCObjectStreamer.h            |    6 +-
 llvm/include/llvm/MC/MCPseudoProbe.h               |  292 +++-
 llvm/include/llvm/MC/MCRegister.h                  |    1 +
 llvm/include/llvm/MC/MCSchedule.h                  |    1 -
 llvm/include/llvm/MC/MCStreamer.h                  |   10 +-
 llvm/include/llvm/MC/MCSymbolWasm.h                |   12 +-
 llvm/include/llvm/MC/MCWasmStreamer.h              |    5 +
 llvm/include/llvm/MC/MCWinCOFFStreamer.h           |    2 +-
 llvm/include/llvm/MC/TargetRegistry.h              | 1373 ++++++++++++++++
 llvm/include/llvm/MCA/CustomBehaviour.h            |   51 +-
 llvm/include/llvm/MCA/Instruction.h                |    4 +-
 llvm/include/llvm/MCA/Stages/InOrderIssueStage.h   |    5 +-
 llvm/include/llvm/MCA/View.h                       |   41 +
 llvm/include/llvm/Object/ELF.h                     |    7 +-
 llvm/include/llvm/Object/ELFObjectFile.h           |   17 +-
 llvm/include/llvm/Object/ELFTypes.h                |    8 +-
 llvm/include/llvm/Object/Error.h                   |    4 +
 llvm/include/llvm/Object/MachO.h                   |    3 +
 llvm/include/llvm/Object/Wasm.h                    |    9 +-
 llvm/include/llvm/Object/XCOFFObjectFile.h         |  153 +-
 llvm/include/llvm/ObjectYAML/MachOYAML.h           |    1 +
 llvm/include/llvm/ObjectYAML/WasmYAML.h            |   38 +-
 llvm/include/llvm/ObjectYAML/XCOFFYAML.h           |   54 +-
 llvm/include/llvm/Option/Arg.h                     |    5 +-
 llvm/include/llvm/Option/OptParser.td              |    2 +-
 llvm/include/llvm/Option/OptTable.h                |   13 +-
 llvm/include/llvm/Option/Option.h                  |   14 +-
 llvm/include/llvm/Passes/OptimizationLevel.h       |  127 ++
 llvm/include/llvm/Passes/PassBuilder.h             |  178 +--
 .../include/llvm/Passes/StandardInstrumentations.h |  217 ++-
 .../llvm/ProfileData/Coverage/CoverageMapping.h    |    7 +-
 llvm/include/llvm/ProfileData/InstrProf.h          |   18 +-
 llvm/include/llvm/ProfileData/InstrProfData.inc    |   11 +-
 llvm/include/llvm/ProfileData/InstrProfReader.h    |   18 +-
 llvm/include/llvm/ProfileData/ProfileCommon.h      |   10 +-
 llvm/include/llvm/ProfileData/SampleProf.h         |  376 +++--
 llvm/include/llvm/ProfileData/SampleProfReader.h   |   49 +-
 llvm/include/llvm/ProfileData/SampleProfWriter.h   |   74 +-
 llvm/include/llvm/Support/AArch64TargetParser.def  |   36 +
 llvm/include/llvm/Support/ARMTargetParser.def      |   18 +
 llvm/include/llvm/Support/Allocator.h              |    2 +-
 llvm/include/llvm/Support/AtomicOrdering.h         |   10 +
 llvm/include/llvm/Support/BinaryByteStream.h       |   34 +-
 llvm/include/llvm/Support/BinaryItemStream.h       |   14 +-
 llvm/include/llvm/Support/BinaryStream.h           |   12 +-
 llvm/include/llvm/Support/BinaryStreamArray.h      |    7 +-
 llvm/include/llvm/Support/BinaryStreamReader.h     |   14 +-
 llvm/include/llvm/Support/BinaryStreamRef.h        |   71 +-
 llvm/include/llvm/Support/BinaryStreamWriter.h     |   14 +-
 llvm/include/llvm/Support/Caching.h                |   71 +
 llvm/include/llvm/Support/CommandLine.h            |   64 +-
 llvm/include/llvm/Support/Compiler.h               |   60 +-
 llvm/include/llvm/Support/CrashRecoveryContext.h   |    3 +-
 llvm/include/llvm/Support/DOTGraphTraits.h         |    5 +
 llvm/include/llvm/Support/DataExtractor.h          |    3 +
 llvm/include/llvm/Support/Debug.h                  |   21 -
 llvm/include/llvm/Support/DivisionByConstantInfo.h |   38 +
 llvm/include/llvm/Support/Error.h                  |   37 +-
 llvm/include/llvm/Support/ErrorHandling.h          |   26 +-
 llvm/include/llvm/Support/ExtensibleRTTI.h         |    7 +-
 llvm/include/llvm/Support/FileSystem.h             |    8 +-
 llvm/include/llvm/Support/FileSystem/UniqueID.h    |   27 +
 llvm/include/llvm/Support/FormatVariadic.h         |    2 +-
 .../llvm/Support/GenericDomTreeConstruction.h      |    4 +-
 llvm/include/llvm/Support/GraphWriter.h            |   91 +-
 llvm/include/llvm/Support/HashBuilder.h            |  438 ++++++
 llvm/include/llvm/Support/JSON.h                   |   46 +-
 llvm/include/llvm/Support/KnownBits.h              |   21 +-
 llvm/include/llvm/Support/MD5.h                    |   37 +-
 llvm/include/llvm/Support/MSP430AttributeParser.h  |   44 +
 llvm/include/llvm/Support/MSP430Attributes.h       |   44 +
 llvm/include/llvm/Support/MachineValueType.h       |   50 +-
 llvm/include/llvm/Support/Memory.h                 |   13 +-
 llvm/include/llvm/Support/PGOOptions.h             |   65 +
 llvm/include/llvm/Support/Parallel.h               |    5 +-
 llvm/include/llvm/Support/Path.h                   |   67 +-
 llvm/include/llvm/Support/Process.h                |    6 +-
 llvm/include/llvm/Support/RISCVISAInfo.h           |   89 ++
 llvm/include/llvm/Support/RISCVTargetParser.def    |   10 +-
 llvm/include/llvm/Support/Signposts.h              |   43 +-
 llvm/include/llvm/Support/TargetOpcodes.def        |    3 +
 llvm/include/llvm/Support/TargetRegistry.h         | 1297 ---------------
 llvm/include/llvm/Support/TargetSelect.h           |   12 +
 llvm/include/llvm/Support/TypeSize.h               |    8 +-
 llvm/include/llvm/Support/VersionTuple.h           |    7 +
 llvm/include/llvm/Support/VirtualFileSystem.h      |   35 +-
 llvm/include/llvm/Support/Windows/WindowsSupport.h |    4 +-
 .../llvm/Support/X86DisassemblerDecoderCommon.h    |    8 +-
 llvm/include/llvm/Support/X86TargetParser.def      |  135 +-
 llvm/include/llvm/Support/X86TargetParser.h        |    4 +
 llvm/include/llvm/Support/YAMLTraits.h             |    2 +-
 llvm/include/llvm/Support/raw_ostream.h            |    8 +-
 llvm/include/llvm/TableGen/DirectiveEmitter.h      |    2 +-
 llvm/include/llvm/TableGen/Error.h                 |   22 +-
 llvm/include/llvm/TableGen/Record.h                |   51 +-
 llvm/include/llvm/Target/GenericOpcodes.td         |   12 +
 llvm/include/llvm/Target/GlobalISel/Combine.td     |  101 +-
 .../llvm/Target/GlobalISel/SelectionDAGCompat.td   |    2 +
 llvm/include/llvm/Target/Target.td                 |   22 +
 .../include/llvm/Target/TargetLoweringObjectFile.h |    9 +
 llvm/include/llvm/Target/TargetMachine.h           |   25 +
 llvm/include/llvm/Target/TargetOptions.h           |   34 +-
 llvm/include/llvm/Target/TargetSelectionDAG.td     |   28 +-
 llvm/include/llvm/TextAPI/Architecture.h           |    6 +-
 llvm/include/llvm/TextAPI/ArchitectureSet.h        |    6 +-
 llvm/include/llvm/TextAPI/InterfaceFile.h          |    8 +-
 llvm/include/llvm/TextAPI/PackedVersion.h          |    6 +-
 llvm/include/llvm/TextAPI/Platform.h               |    6 +-
 llvm/include/llvm/TextAPI/Symbol.h                 |    6 +-
 llvm/include/llvm/TextAPI/Target.h                 |    6 +-
 llvm/include/llvm/TextAPI/TextAPIReader.h          |    6 +-
 llvm/include/llvm/TextAPI/TextAPIWriter.h          |    6 +-
 llvm/include/llvm/Transforms/IPO/Attributor.h      |  214 ++-
 llvm/include/llvm/Transforms/IPO/FunctionAttrs.h   |    8 +
 llvm/include/llvm/Transforms/IPO/FunctionImport.h  |   29 +-
 llvm/include/llvm/Transforms/IPO/IROutliner.h      |   36 +-
 llvm/include/llvm/Transforms/IPO/Inliner.h         |   16 +-
 llvm/include/llvm/Transforms/IPO/LoopExtractor.h   |    2 +
 llvm/include/llvm/Transforms/IPO/ModuleInliner.h   |   51 +
 .../llvm/Transforms/IPO/PassManagerBuilder.h       |    1 -
 .../llvm/Transforms/IPO/ProfiledCallGraph.h        |   13 +-
 .../llvm/Transforms/IPO/SampleContextTracker.h     |   55 +-
 .../llvm/Transforms/InstCombine/InstCombine.h      |   10 +-
 .../Transforms/InstCombine/InstCombineWorklist.h   |  128 --
 .../llvm/Transforms/InstCombine/InstCombiner.h     |   50 +-
 llvm/include/llvm/Transforms/Instrumentation.h     |    6 +-
 .../Transforms/Instrumentation/AddressSanitizer.h  |   48 +-
 .../Instrumentation/AddressSanitizerCommon.h       |   79 +-
 .../Instrumentation/AddressSanitizerOptions.h      |    7 +-
 .../Instrumentation/HWAddressSanitizer.h           |   30 +-
 .../Transforms/Instrumentation/InstrOrderFile.h    |    7 +-
 .../llvm/Transforms/Instrumentation/MemProfiler.h  |    7 +-
 .../Transforms/Instrumentation/MemorySanitizer.h   |   17 +
 .../Transforms/Instrumentation/ThreadSanitizer.h   |    8 +
 llvm/include/llvm/Transforms/Scalar/EarlyCSE.h     |    2 +
 llvm/include/llvm/Transforms/Scalar/GVN.h          |   15 +-
 .../include/llvm/Transforms/Scalar/JumpThreading.h |    8 +-
 .../llvm/Transforms/Scalar/LoopPassManager.h       |   94 +-
 .../llvm/Transforms/Scalar/LoopUnrollPass.h        |    2 +
 .../llvm/Transforms/Scalar/LowerMatrixIntrinsics.h |    2 +
 .../llvm/Transforms/Scalar/MemCpyOptimizer.h       |    9 +-
 .../llvm/Transforms/Scalar/MergedLoadStoreMotion.h |    2 +
 llvm/include/llvm/Transforms/Scalar/SROA.h         |    4 +-
 .../llvm/Transforms/Scalar/SimpleLoopUnswitch.h    |    3 +
 llvm/include/llvm/Transforms/Scalar/SimplifyCFG.h  |    3 +
 .../llvm/Transforms/Utils/ASanStackFrameLayout.h   |   12 +-
 .../llvm/Transforms/Utils/AddDiscriminators.h      |    1 +
 .../llvm/Transforms/Utils/BasicBlockUtils.h        |   30 +-
 llvm/include/llvm/Transforms/Utils/BuildLibCalls.h |   10 +-
 llvm/include/llvm/Transforms/Utils/Cloning.h       |    4 +-
 llvm/include/llvm/Transforms/Utils/CodeExtractor.h |   18 +
 .../include/llvm/Transforms/Utils/CodeMoverUtils.h |   16 +-
 .../llvm/Transforms/Utils/EntryExitInstrumenter.h  |    3 +
 .../llvm/Transforms/Utils/FunctionImportUtils.h    |    3 -
 llvm/include/llvm/Transforms/Utils/GlobalStatus.h  |   15 +-
 .../llvm/Transforms/Utils/InstructionWorklist.h    |  123 ++
 llvm/include/llvm/Transforms/Utils/Local.h         |   40 +-
 llvm/include/llvm/Transforms/Utils/LoopPeel.h      |    4 +-
 llvm/include/llvm/Transforms/Utils/LoopUtils.h     |   56 +-
 .../include/llvm/Transforms/Utils/MemoryOpRemark.h |    7 +-
 llvm/include/llvm/Transforms/Utils/PredicateInfo.h |    6 +-
 .../include/llvm/Transforms/Utils/SSAUpdaterBulk.h |    4 -
 .../Transforms/Utils/SampleProfileLoaderBaseImpl.h |   70 +-
 .../Transforms/Utils/ScalarEvolutionExpander.h     |    9 +-
 .../llvm/Transforms/Utils/SimplifyLibCalls.h       |    2 -
 llvm/include/llvm/Transforms/Utils/UnrollLoop.h    |    3 +-
 llvm/include/llvm/Transforms/Utils/ValueMapper.h   |   11 +-
 .../Vectorize/LoopVectorizationLegality.h          |    2 +-
 .../llvm/Transforms/Vectorize/LoopVectorize.h      |    2 +
 .../llvm/Transforms/Vectorize/SLPVectorizer.h      |    4 +-
 .../llvm/Transforms/Vectorize/VectorCombine.h      |   10 +-
 .../llvm/WindowsManifest/WindowsManifestMerger.h   |    3 +-
 llvm/include/llvm/module.modulemap                 |   18 +-
 487 files changed, 18877 insertions(+), 13196 deletions(-)
 create mode 100644 llvm/include/llvm/ADT/CombinationGenerator.h
 create mode 100644 llvm/include/llvm/Analysis/CostModel.h
 create mode 100644 llvm/include/llvm/Analysis/InlineOrder.h
 create mode 100644 llvm/include/llvm/CodeGen/CodeGenCommonISel.h
 create mode 100644 llvm/include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h
 create mode 100644 llvm/include/llvm/CodeGen/MIRSampleProfile.h
 create mode 100644 llvm/include/llvm/ExecutionEngine/JITLink/ELF_aarch64.h
 create mode 100644 llvm/include/llvm/ExecutionEngine/JITLink/MemoryFlags.h
 create mode 100644 llvm/include/llvm/ExecutionEngine/JITLink/TableManager.h
 create mode 100644 llvm/include/llvm/ExecutionEngine/JITLink/aarch64.h
 create mode 100644 llvm/include/llvm/ExecutionEngine/Orc/DebuggerSupportPlugin.h
 create mode 100644 llvm/include/llvm/ExecutionEngine/Orc/ELFNixPlatform.h
 create mode 100644 llvm/include/llvm/ExecutionEngine/Orc/EPCGenericDylibManager.h
 create mode 100644 llvm/include/llvm/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.h
 create mode 100644 llvm/include/llvm/ExecutionEngine/Orc/EPCGenericMemoryAccess.h
 create mode 100644 llvm/include/llvm/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.h
 delete mode 100644 llvm/include/llvm/ExecutionEngine/Orc/LLVMSPSSerializers.h
 create mode 100644 llvm/include/llvm/ExecutionEngine/Orc/LookupAndRecordAddrs.h
 delete mode 100644 llvm/include/llvm/ExecutionEngine/Orc/OrcRPCExecutorProcessControl.h
 delete mode 100644 llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h
 delete mode 100644 llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h
 delete mode 100644 llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h
 delete mode 100644 llvm/include/llvm/ExecutionEngine/Orc/Shared/FDRawByteChannel.h
 create mode 100644 llvm/include/llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h
 delete mode 100644 llvm/include/llvm/ExecutionEngine/Orc/Shared/RPCUtils.h
 delete mode 100644 llvm/include/llvm/ExecutionEngine/Orc/Shared/RawByteChannel.h
 delete mode 100644 llvm/include/llvm/ExecutionEngine/Orc/Shared/Serialization.h
 create mode 100644 llvm/include/llvm/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.h
 create mode 100644 llvm/include/llvm/ExecutionEngine/Orc/SimpleRemoteEPC.h
 create mode 100644 llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/ExecutorBootstrapService.h
 delete mode 100644 llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/OrcRPCTPCServer.h
 create mode 100644 llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleExecutorDylibManager.h
 create mode 100644 llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.h
 create mode 100644 llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.h
 create mode 100644 llvm/include/llvm/ExecutionEngine/Orc/TaskDispatch.h
 delete mode 100644 llvm/include/llvm/ExecutionEngine/OrcMCJITReplacement.h
 delete mode 100644 llvm/include/llvm/ExecutionEngine/OrcV1Deprecation.h
 delete mode 100644 llvm/include/llvm/IR/GlobalIndirectSymbol.h
 delete mode 100644 llvm/include/llvm/LTO/Caching.h
 create mode 100644 llvm/include/llvm/MC/MCAsmInfoGOFF.h
 create mode 100644 llvm/include/llvm/MC/TargetRegistry.h
 create mode 100644 llvm/include/llvm/MCA/View.h
 create mode 100644 llvm/include/llvm/Passes/OptimizationLevel.h
 create mode 100644 llvm/include/llvm/Support/Caching.h
 create mode 100644 llvm/include/llvm/Support/DivisionByConstantInfo.h
 create mode 100644 llvm/include/llvm/Support/HashBuilder.h
 create mode 100644 llvm/include/llvm/Support/MSP430AttributeParser.h
 create mode 100644 llvm/include/llvm/Support/MSP430Attributes.h
 create mode 100644 llvm/include/llvm/Support/PGOOptions.h
 create mode 100644 llvm/include/llvm/Support/RISCVISAInfo.h
 delete mode 100644 llvm/include/llvm/Support/TargetRegistry.h
 create mode 100644 llvm/include/llvm/Transforms/IPO/ModuleInliner.h
 delete mode 100644 llvm/include/llvm/Transforms/InstCombine/InstCombineWorklist.h
 create mode 100644 llvm/include/llvm/Transforms/Utils/InstructionWorklist.h

(limited to 'llvm/include')

diff --git a/llvm/include/llvm-c/Comdat.h b/llvm/include/llvm-c/Comdat.h
index 81cde1107fa4..8002bc0581af 100644
--- a/llvm/include/llvm-c/Comdat.h
+++ b/llvm/include/llvm-c/Comdat.h
@@ -19,6 +19,13 @@
 
 LLVM_C_EXTERN_C_BEGIN
 
+/**
+ * @defgroup LLVMCCoreComdat Comdats
+ * @ingroup LLVMCCore
+ *
+ * @{
+ */
+
 typedef enum {
   LLVMAnyComdatSelectionKind,        ///< The linker may choose any COMDAT.
   LLVMExactMatchComdatSelectionKind, ///< The data referenced by the COMDAT must
@@ -66,6 +73,10 @@ LLVMComdatSelectionKind LLVMGetComdatSelectionKind(LLVMComdatRef C);
  */
 void LLVMSetComdatSelectionKind(LLVMComdatRef C, LLVMComdatSelectionKind Kind);
 
+/**
+ * @}
+ */
+
 LLVM_C_EXTERN_C_END
 
 #endif
diff --git a/llvm/include/llvm-c/Core.h b/llvm/include/llvm-c/Core.h
index 1a5e763cfc60..d170eff17951 100644
--- a/llvm/include/llvm-c/Core.h
+++ b/llvm/include/llvm-c/Core.h
@@ -1580,10 +1580,10 @@ LLVMTypeRef LLVMX86AMXType(void);
       macro(ConstantVector)                 \
       macro(GlobalValue)                    \
         macro(GlobalAlias)                  \
-        macro(GlobalIFunc)                  \
         macro(GlobalObject)                 \
           macro(Function)                   \
           macro(GlobalVariable)             \
+          macro(GlobalIFunc)                \
       macro(UndefValue)                     \
       macro(PoisonValue)                    \
     macro(Instruction)                      \
@@ -3287,7 +3287,7 @@ void LLVMSetInstructionCallConv(LLVMValueRef Instr, unsigned CC);
  */
 unsigned LLVMGetInstructionCallConv(LLVMValueRef Instr);
 
-void LLVMSetInstrParamAlignment(LLVMValueRef Instr, unsigned index,
+void LLVMSetInstrParamAlignment(LLVMValueRef Instr, LLVMAttributeIndex Idx,
                                 unsigned Align);
 
 void LLVMAddCallSiteAttribute(LLVMValueRef C, LLVMAttributeIndex Idx,
@@ -3611,10 +3611,20 @@ void LLVMSetCurrentDebugLocation2(LLVMBuilderRef Builder, LLVMMetadataRef Loc);
  * current debug location for the given builder.  If the builder has no current
  * debug location, this function is a no-op.
  *
+ * @deprecated LLVMSetInstDebugLocation is deprecated in favor of the more general
+ *             LLVMAddMetadataToInst.
+ *
  * @see llvm::IRBuilder::SetInstDebugLocation()
  */
 void LLVMSetInstDebugLocation(LLVMBuilderRef Builder, LLVMValueRef Inst);
 
+/**
+ * Adds the metadata registered with the given builder to the given instruction.
+ *
+ * @see llvm::IRBuilder::AddMetadataToInst()
+ */
+void LLVMAddMetadataToInst(LLVMBuilderRef Builder, LLVMValueRef Inst);
+
 /**
  * Get the dafult floating-point math metadata for a given builder.
  *
@@ -4081,6 +4091,7 @@ void LLVMDisposeMemoryBuffer(LLVMMemoryBufferRef MemBuf);
 
 /**
  * @defgroup LLVMCCorePassRegistry Pass Registry
+ * @ingroup LLVMCCore
  *
  * @{
  */
@@ -4095,6 +4106,7 @@ LLVMPassRegistryRef LLVMGetGlobalPassRegistry(void);
 
 /**
  * @defgroup LLVMCCorePassManagers Pass Managers
+ * @ingroup LLVMCCore
  *
  * @{
  */
diff --git a/llvm/include/llvm-c/DebugInfo.h b/llvm/include/llvm-c/DebugInfo.h
index 8c085807914b..d7fb898b60d2 100644
--- a/llvm/include/llvm-c/DebugInfo.h
+++ b/llvm/include/llvm-c/DebugInfo.h
@@ -21,6 +21,13 @@
 
 LLVM_C_EXTERN_C_BEGIN
 
+/**
+ * @defgroup LLVMCCoreDebugInfo Debug Information
+ * @ingroup LLVMCCore
+ *
+ * @{
+ */
+
 /**
  * Debug info flags.
  */
@@ -226,6 +233,13 @@ void LLVMDisposeDIBuilder(LLVMDIBuilderRef Builder);
  */
 void LLVMDIBuilderFinalize(LLVMDIBuilderRef Builder);
 
+/**
+ * Finalize a specific subprogram.
+ * No new variables may be added to this subprogram afterwards.
+ */
+void LLVMDIBuilderFinalizeSubprogram(LLVMDIBuilderRef Builder,
+                                     LLVMMetadataRef Subprogram);
+
 /**
  * A CompileUnit provides an anchor for all debugging
  * information generated during this instance of compilation.
@@ -389,48 +403,48 @@ LLVMDIBuilderCreateImportedModuleFromNamespace(LLVMDIBuilderRef Builder,
  * \param ImportedEntity Previous imported entity to alias.
  * \param File           File where the declaration is located.
  * \param Line           Line number of the declaration.
+ * \param Elements       Renamed elements.
+ * \param NumElements    Number of renamed elements.
  */
-LLVMMetadataRef
-LLVMDIBuilderCreateImportedModuleFromAlias(LLVMDIBuilderRef Builder,
-                                           LLVMMetadataRef Scope,
-                                           LLVMMetadataRef ImportedEntity,
-                                           LLVMMetadataRef File,
-                                           unsigned Line);
+LLVMMetadataRef LLVMDIBuilderCreateImportedModuleFromAlias(
+    LLVMDIBuilderRef Builder, LLVMMetadataRef Scope,
+    LLVMMetadataRef ImportedEntity, LLVMMetadataRef File, unsigned Line,
+    LLVMMetadataRef *Elements, unsigned NumElements);
 
 /**
  * Create a descriptor for an imported module.
- * \param Builder    The \c DIBuilder.
- * \param Scope      The scope this module is imported into
- * \param M          The module being imported here
- * \param File       File where the declaration is located.
- * \param Line       Line number of the declaration.
+ * \param Builder        The \c DIBuilder.
+ * \param Scope          The scope this module is imported into
+ * \param M              The module being imported here
+ * \param File           File where the declaration is located.
+ * \param Line           Line number of the declaration.
+ * \param Elements       Renamed elements.
+ * \param NumElements    Number of renamed elements.
  */
-LLVMMetadataRef
-LLVMDIBuilderCreateImportedModuleFromModule(LLVMDIBuilderRef Builder,
-                                            LLVMMetadataRef Scope,
-                                            LLVMMetadataRef M,
-                                            LLVMMetadataRef File,
-                                            unsigned Line);
+LLVMMetadataRef LLVMDIBuilderCreateImportedModuleFromModule(
+    LLVMDIBuilderRef Builder, LLVMMetadataRef Scope, LLVMMetadataRef M,
+    LLVMMetadataRef File, unsigned Line, LLVMMetadataRef *Elements,
+    unsigned NumElements);
 
 /**
  * Create a descriptor for an imported function, type, or variable.  Suitable
  * for e.g. FORTRAN-style USE declarations.
- * \param Builder    The DIBuilder.
- * \param Scope      The scope this module is imported into.
- * \param Decl       The declaration (or definition) of a function, type,
-                     or variable.
- * \param File       File where the declaration is located.
- * \param Line       Line number of the declaration.
- * \param Name       A name that uniquely identifies this imported declaration.
- * \param NameLen    The length of the C string passed to \c Name.
+ * \param Builder        The DIBuilder.
+ * \param Scope          The scope this module is imported into.
+ * \param Decl           The declaration (or definition) of a function, type,
+                         or variable.
+ * \param File           File where the declaration is located.
+ * \param Line           Line number of the declaration.
+ * \param Name           A name that uniquely identifies this imported
+ declaration.
+ * \param NameLen        The length of the C string passed to \c Name.
+ * \param Elements       Renamed elements.
+ * \param NumElements    Number of renamed elements.
  */
-LLVMMetadataRef
-LLVMDIBuilderCreateImportedDeclaration(LLVMDIBuilderRef Builder,
-                                       LLVMMetadataRef Scope,
-                                       LLVMMetadataRef Decl,
-                                       LLVMMetadataRef File,
-                                       unsigned Line,
-                                       const char *Name, size_t NameLen);
+LLVMMetadataRef LLVMDIBuilderCreateImportedDeclaration(
+    LLVMDIBuilderRef Builder, LLVMMetadataRef Scope, LLVMMetadataRef Decl,
+    LLVMMetadataRef File, unsigned Line, const char *Name, size_t NameLen,
+    LLVMMetadataRef *Elements, unsigned NumElements);
 
 /**
  * Creates a new DebugLocation that describes a source location.
@@ -1360,6 +1374,10 @@ void LLVMInstructionSetDebugLoc(LLVMValueRef Inst, LLVMMetadataRef Loc);
  */
 LLVMMetadataKind LLVMGetMetadataKind(LLVMMetadataRef Metadata);
 
+/**
+ * @}
+ */
+
 LLVM_C_EXTERN_C_END
 
 #endif
diff --git a/llvm/include/llvm-c/DisassemblerTypes.h b/llvm/include/llvm-c/DisassemblerTypes.h
index ae5c68227594..53baaef11033 100644
--- a/llvm/include/llvm-c/DisassemblerTypes.h
+++ b/llvm/include/llvm-c/DisassemblerTypes.h
@@ -17,6 +17,12 @@
 #include <stddef.h>
 #endif
 
+/**
+ * @addtogroup LLVMCDisassembler
+ *
+ * @{
+ */
+
 /**
  * An opaque reference to a disassembler context.
  */
@@ -157,4 +163,8 @@ typedef const char *(*LLVMSymbolLookupCallback)(void *DisInfo,
 /* The output reference is to a C++ symbol name. */
 #define LLVMDisassembler_ReferenceType_DeMangled_Name 9
 
+/**
+ * @}
+ */
+
 #endif
diff --git a/llvm/include/llvm-c/Error.h b/llvm/include/llvm-c/Error.h
index bc702ac7a1bf..c3baaf65186a 100644
--- a/llvm/include/llvm-c/Error.h
+++ b/llvm/include/llvm-c/Error.h
@@ -18,6 +18,13 @@
 
 LLVM_C_EXTERN_C_BEGIN
 
+/**
+ * @defgroup LLVMCError Error Handling
+ * @ingroup LLVMC
+ *
+ * @{
+ */
+
 #define LLVMErrorSuccess 0
 
 /**
@@ -67,6 +74,10 @@ LLVMErrorTypeId LLVMGetStringErrorTypeId(void);
  */
 LLVMErrorRef LLVMCreateStringError(const char *ErrMsg);
 
+/**
+ * @}
+ */
+
 LLVM_C_EXTERN_C_END
 
 #endif
diff --git a/llvm/include/llvm-c/ErrorHandling.h b/llvm/include/llvm-c/ErrorHandling.h
index 5ba099c209c0..d9b9f22752b8 100644
--- a/llvm/include/llvm-c/ErrorHandling.h
+++ b/llvm/include/llvm-c/ErrorHandling.h
@@ -18,6 +18,12 @@
 
 LLVM_C_EXTERN_C_BEGIN
 
+/**
+ * @addtogroup LLVMCError
+ *
+ * @{
+ */
+
 typedef void (*LLVMFatalErrorHandler)(const char *Reason);
 
 /**
@@ -42,6 +48,10 @@ void LLVMResetFatalErrorHandler(void);
  */
 void LLVMEnablePrettyStackTrace(void);
 
+/**
+ * @}
+ */
+
 LLVM_C_EXTERN_C_END
 
 #endif
diff --git a/llvm/include/llvm-c/IRReader.h b/llvm/include/llvm-c/IRReader.h
index 5a3f633c3d91..905b84fa5a86 100644
--- a/llvm/include/llvm-c/IRReader.h
+++ b/llvm/include/llvm-c/IRReader.h
@@ -19,6 +19,13 @@
 
 LLVM_C_EXTERN_C_BEGIN
 
+/**
+ * @defgroup LLVMCCoreIRReader IR Reader
+ * @ingroup LLVMCCore
+ *
+ * @{
+ */
+
 /**
  * Read LLVM IR from a memory buffer and convert it into an in-memory Module
  * object. Returns 0 on success.
@@ -32,6 +39,10 @@ LLVMBool LLVMParseIRInContext(LLVMContextRef ContextRef,
                               LLVMMemoryBufferRef MemBuf, LLVMModuleRef *OutM,
                               char **OutMessage);
 
+/**
+ * @}
+ */
+
 LLVM_C_EXTERN_C_END
 
 #endif
diff --git a/llvm/include/llvm-c/LLJIT.h b/llvm/include/llvm-c/LLJIT.h
index f689ca0f1cf0..a06133aac4fb 100644
--- a/llvm/include/llvm-c/LLJIT.h
+++ b/llvm/include/llvm-c/LLJIT.h
@@ -31,6 +31,13 @@
 
 LLVM_C_EXTERN_C_BEGIN
 
+/**
+ * @defgroup LLVMCExecutionEngineLLJIT LLJIT
+ * @ingroup LLVMCExecutionEngine
+ *
+ * @{
+ */
+
 /**
  * A function for constructing an ObjectLinkingLayer instance to be used
  * by an LLJIT instance.
@@ -235,6 +242,10 @@ LLVMOrcIRTransformLayerRef LLVMOrcLLJITGetIRTransformLayer(LLVMOrcLLJITRef J);
  */
 const char *LLVMOrcLLJITGetDataLayoutStr(LLVMOrcLLJITRef J);
 
+/**
+ * @}
+ */
+
 LLVM_C_EXTERN_C_END
 
 #endif /* LLVM_C_LLJIT_H */
diff --git a/llvm/include/llvm-c/Linker.h b/llvm/include/llvm-c/Linker.h
index 1ad9cc958753..acff5d5e2225 100644
--- a/llvm/include/llvm-c/Linker.h
+++ b/llvm/include/llvm-c/Linker.h
@@ -19,6 +19,13 @@
 
 LLVM_C_EXTERN_C_BEGIN
 
+/**
+ * @defgroup LLVMCCoreLinker Linker
+ * @ingroup LLVMCCore
+ *
+ * @{
+ */
+
 /* This enum is provided for backwards-compatibility only. It has no effect. */
 typedef enum {
   LLVMLinkerDestroySource = 0, /* This is the default behavior. */
@@ -35,4 +42,8 @@ LLVMBool LLVMLinkModules2(LLVMModuleRef Dest, LLVMModuleRef Src);
 
 LLVM_C_EXTERN_C_END
 
+/**
+ * @}
+ */
+
 #endif
diff --git a/llvm/include/llvm-c/Orc.h b/llvm/include/llvm-c/Orc.h
index 1790afbcecc7..e2f30b7cdf45 100644
--- a/llvm/include/llvm-c/Orc.h
+++ b/llvm/include/llvm-c/Orc.h
@@ -33,6 +33,13 @@
 
 LLVM_C_EXTERN_C_BEGIN
 
+/**
+ * @defgroup LLVMCExecutionEngineORC On-Request-Compilation
+ * @ingroup LLVMCExecutionEngine
+ *
+ * @{
+ */
+
 /**
  * Represents an address in the executor process.
  */
@@ -920,6 +927,49 @@ LLVMErrorRef LLVMOrcCreateDynamicLibrarySearchGeneratorForProcess(
     LLVMOrcDefinitionGeneratorRef *Result, char GlobalPrefx,
     LLVMOrcSymbolPredicate Filter, void *FilterCtx);
 
+/**
+ * Get a LLVMOrcCreateDynamicLibararySearchGeneratorForPath that will reflect
+ * library symbols into the JITDylib. On success the resulting generator is
+ * owned by the client. Ownership is typically transferred by adding the
+ * instance to a JITDylib using LLVMOrcJITDylibAddGenerator,
+ *
+ * The GlobalPrefix argument specifies the character that appears on the front
+ * of linker-mangled symbols for the target platform (e.g. '_' on MachO).
+ * If non-null, this character will be stripped from the start of all symbol
+ * strings before passing the remaining substring to dlsym.
+ *
+ * The optional Filter and Ctx arguments can be used to supply a symbol name
+ * filter: Only symbols for which the filter returns true will be visible to
+ * JIT'd code. If the Filter argument is null then all library symbols will
+ * be visible to JIT'd code. Note that the symbol name passed to the Filter
+ * function is the full mangled symbol: The client is responsible for stripping
+ * the global prefix if present.
+ * 
+ * THIS API IS EXPERIMENTAL AND LIKELY TO CHANGE IN THE NEAR FUTURE!
+ * 
+ */
+LLVMErrorRef LLVMOrcCreateDynamicLibrarySearchGeneratorForPath(
+    LLVMOrcDefinitionGeneratorRef *Result, const char *FileName,
+    char GlobalPrefix, LLVMOrcSymbolPredicate Filter, void *FilterCtx);
+
+/**
+ * Get a LLVMOrcCreateStaticLibrarySearchGeneratorForPath that will reflect
+ * static library symbols into the JITDylib. On success the resulting
+ * generator is owned by the client. Ownership is typically transferred by
+ * adding the instance to a JITDylib using LLVMOrcJITDylibAddGenerator,
+ *
+ * Call with the optional TargetTriple argument will succeed if the file at
+ * the given path is a static library or a MachO universal binary containing a
+ * static library that is compatible with the given triple. Otherwise it will
+ * return an error.
+ *
+ * THIS API IS EXPERIMENTAL AND LIKELY TO CHANGE IN THE NEAR FUTURE!
+ * 
+ */
+LLVMErrorRef LLVMOrcCreateStaticLibrarySearchGeneratorForPath(
+    LLVMOrcDefinitionGeneratorRef *Result, LLVMOrcObjectLayerRef ObjLayer,
+    const char *FileName, const char *TargetTriple);
+
 /**
  * Create a ThreadSafeContext containing a new LLVMContext.
  *
@@ -1133,6 +1183,10 @@ void LLVMOrcDisposeDumpObjects(LLVMOrcDumpObjectsRef DumpObjects);
 LLVMErrorRef LLVMOrcDumpObjects_CallOperator(LLVMOrcDumpObjectsRef DumpObjects,
                                              LLVMMemoryBufferRef *ObjBuffer);
 
+/**
+ * @}
+ */
+
 LLVM_C_EXTERN_C_END
 
 #endif /* LLVM_C_ORC_H */
diff --git a/llvm/include/llvm-c/OrcEE.h b/llvm/include/llvm-c/OrcEE.h
index 2435e7421a42..e7ae0f5e6be2 100644
--- a/llvm/include/llvm-c/OrcEE.h
+++ b/llvm/include/llvm-c/OrcEE.h
@@ -32,6 +32,13 @@
 
 LLVM_C_EXTERN_C_BEGIN
 
+/**
+ * @defgroup LLVMCExecutionEngineORCEE ExecutionEngine-based ORC Utils
+ * @ingroup LLVMCExecutionEngine
+ *
+ * @{
+ */
+
 /**
  * Create a RTDyldObjectLinkingLayer instance using the standard
  * SectionMemoryManager for memory management.
@@ -50,6 +57,10 @@ void LLVMOrcRTDyldObjectLinkingLayerRegisterJITEventListener(
     LLVMOrcObjectLayerRef RTDyldObjLinkingLayer,
     LLVMJITEventListenerRef Listener);
 
+/**
+ * @}
+ */
+
 LLVM_C_EXTERN_C_END
 
 #endif /* LLVM_C_ORCEE_H */
diff --git a/llvm/include/llvm-c/Support.h b/llvm/include/llvm-c/Support.h
index 866df32efa98..17657861b32b 100644
--- a/llvm/include/llvm-c/Support.h
+++ b/llvm/include/llvm-c/Support.h
@@ -20,6 +20,12 @@
 
 LLVM_C_EXTERN_C_BEGIN
 
+/**
+ * @addtogroup LLVMCCore
+ *
+ * @{
+ */
+
 /**
  * This function permanently loads the dynamic library at the given path.
  * It is safe to call this function multiple times for the same library.
@@ -57,6 +63,10 @@ void *LLVMSearchForAddressOfSymbol(const char *symbolName);
  */
 void LLVMAddSymbol(const char *symbolName, void *symbolValue);
 
+/**
+ * @}
+ */
+
 LLVM_C_EXTERN_C_END
 
 #endif
diff --git a/llvm/include/llvm-c/TargetMachine.h b/llvm/include/llvm-c/TargetMachine.h
index f82edd948b59..23c8c63ff0b4 100644
--- a/llvm/include/llvm-c/TargetMachine.h
+++ b/llvm/include/llvm-c/TargetMachine.h
@@ -25,6 +25,12 @@
 
 LLVM_C_EXTERN_C_BEGIN
 
+/**
+ * @addtogroup LLVMCTarget
+ *
+ * @{
+ */
+
 typedef struct LLVMOpaqueTargetMachine *LLVMTargetMachineRef;
 typedef struct LLVMTarget *LLVMTargetRef;
 
@@ -156,6 +162,10 @@ char* LLVMGetHostCPUFeatures(void);
 /** Adds the target-specific analysis passes to the pass manager. */
 void LLVMAddAnalysisPasses(LLVMTargetMachineRef T, LLVMPassManagerRef PM);
 
+/**
+ * @}
+ */
+
 LLVM_C_EXTERN_C_END
 
 #endif
diff --git a/llvm/include/llvm-c/Transforms/PassBuilder.h b/llvm/include/llvm-c/Transforms/PassBuilder.h
index 5635f10d6877..6d9f1b45c707 100644
--- a/llvm/include/llvm-c/Transforms/PassBuilder.h
+++ b/llvm/include/llvm-c/Transforms/PassBuilder.h
@@ -18,6 +18,13 @@
 #include "llvm-c/TargetMachine.h"
 #include "llvm-c/Types.h"
 
+/**
+ * @defgroup LLVMCCoreNewPM New Pass Manager
+ * @ingroup LLVMCCore
+ *
+ * @{
+ */
+
 LLVM_C_EXTERN_C_BEGIN
 
 /**
@@ -50,7 +57,7 @@ LLVMErrorRef LLVMRunPasses(LLVMModuleRef M, const char *Passes,
  * responsible for it. The client should call LLVMDisposePassBuilderOptions
  * to free the pass builder options.
  */
-LLVMPassBuilderOptionsRef LLVMCreatePassBuilderOptions();
+LLVMPassBuilderOptionsRef LLVMCreatePassBuilderOptions(void);
 
 /**
  * Toggle adding the VerifierPass for the PassBuilder, ensuring all functions
@@ -97,6 +104,10 @@ void LLVMPassBuilderOptionsSetMergeFunctions(LLVMPassBuilderOptionsRef Options,
  */
 void LLVMDisposePassBuilderOptions(LLVMPassBuilderOptionsRef Options);
 
+/**
+ * @}
+ */
+
 LLVM_C_EXTERN_C_END
 
 #endif // LLVM_C_TRANSFORMS_PASSBUILDER_H
diff --git a/llvm/include/llvm-c/lto.h b/llvm/include/llvm-c/lto.h
index f6fc8588f5f7..5ceb02224d2b 100644
--- a/llvm/include/llvm-c/lto.h
+++ b/llvm/include/llvm-c/lto.h
@@ -46,7 +46,7 @@ typedef bool lto_bool_t;
  * @{
  */
 
-#define LTO_API_VERSION 28
+#define LTO_API_VERSION 29
 
 /**
  * \since prior to LTO_API_VERSION=3
@@ -312,6 +312,16 @@ extern lto_bool_t lto_module_get_macho_cputype(lto_module_t mod,
                                                unsigned int *out_cputype,
                                                unsigned int *out_cpusubtype);
 
+/**
+ * This function can be used by the linker to check if a given module has
+ * any constructor or destructor functions.
+ *
+ * Returns true if the module has either the @llvm.global_ctors or the
+ * @llvm.global_dtors symbol. Otherwise returns false.
+ *
+ * \since LTO_API_VERSION=29
+ */
+extern lto_bool_t lto_module_has_ctor_dtor(lto_module_t mod);
 /**
  * Diagnostic severity.
  *
diff --git a/llvm/include/llvm/ADT/APFloat.h b/llvm/include/llvm/ADT/APFloat.h
index f493a03b4b87..40e0e32c77a8 100644
--- a/llvm/include/llvm/ADT/APFloat.h
+++ b/llvm/include/llvm/ADT/APFloat.h
@@ -961,9 +961,7 @@ public:
   /// Returns a float which is bitcasted from an all one value int.
   ///
   /// \param Semantics - type float semantics
-  /// \param BitWidth - Select float type
-  static APFloat getAllOnesValue(const fltSemantics &Semantics,
-                                 unsigned BitWidth);
+  static APFloat getAllOnesValue(const fltSemantics &Semantics);
 
   /// Used to insert APFloat objects, or objects that contain APFloat objects,
   /// into FoldingSets.
diff --git a/llvm/include/llvm/ADT/APInt.h b/llvm/include/llvm/ADT/APInt.h
index ff586f763e82..595cd94b6b8f 100644
--- a/llvm/include/llvm/ADT/APInt.h
+++ b/llvm/include/llvm/ADT/APInt.h
@@ -31,7 +31,7 @@ class raw_ostream;
 template <typename T> class SmallVectorImpl;
 template <typename T> class ArrayRef;
 template <typename T> class Optional;
-template <typename T> struct DenseMapInfo;
+template <typename T, typename Enable> struct DenseMapInfo;
 
 class APInt;
 
@@ -66,6 +66,11 @@ inline APInt operator-(APInt);
 ///     not.
 ///   * In general, the class tries to follow the style of computation that LLVM
 ///     uses in its IR. This simplifies its use for LLVM.
+///   * APInt supports zero-bit-width values, but operations that require bits
+///     are not defined on it (e.g. you cannot ask for the sign of a zero-bit
+///     integer).  This means that operations like zero extension and logical
+///     shifts are defined, but sign extension and ashr is not.  Zero bit values
+///     compare and hash equal to themselves, and countLeadingZeros returns 0.
 ///
 class LLVM_NODISCARD APInt {
 public:
@@ -87,176 +92,6 @@ public:
 
   static constexpr WordType WORDTYPE_MAX = ~WordType(0);
 
-private:
-  /// This union is used to store the integer value. When the
-  /// integer bit-width <= 64, it uses VAL, otherwise it uses pVal.
-  union {
-    uint64_t VAL;   ///< Used to store the <= 64 bits integer value.
-    uint64_t *pVal; ///< Used to store the >64 bits integer value.
-  } U;
-
-  unsigned BitWidth; ///< The number of bits in this APInt.
-
-  friend struct DenseMapInfo<APInt>;
-
-  friend class APSInt;
-
-  /// Fast internal constructor
-  ///
-  /// This constructor is used only internally for speed of construction of
-  /// temporaries. It is unsafe for general use so it is not public.
-  APInt(uint64_t *val, unsigned bits) : BitWidth(bits) {
-    U.pVal = val;
-  }
-
-  /// Determine which word a bit is in.
-  ///
-  /// \returns the word position for the specified bit position.
-  static unsigned whichWord(unsigned bitPosition) {
-    return bitPosition / APINT_BITS_PER_WORD;
-  }
-
-  /// Determine which bit in a word a bit is in.
-  ///
-  /// \returns the bit position in a word for the specified bit position
-  /// in the APInt.
-  static unsigned whichBit(unsigned bitPosition) {
-    return bitPosition % APINT_BITS_PER_WORD;
-  }
-
-  /// Get a single bit mask.
-  ///
-  /// \returns a uint64_t with only bit at "whichBit(bitPosition)" set
-  /// This method generates and returns a uint64_t (word) mask for a single
-  /// bit at a specific bit position. This is used to mask the bit in the
-  /// corresponding word.
-  static uint64_t maskBit(unsigned bitPosition) {
-    return 1ULL << whichBit(bitPosition);
-  }
-
-  /// Clear unused high order bits
-  ///
-  /// This method is used internally to clear the top "N" bits in the high order
-  /// word that are not used by the APInt. This is needed after the most
-  /// significant word is assigned a value to ensure that those bits are
-  /// zero'd out.
-  APInt &clearUnusedBits() {
-    // Compute how many bits are used in the final word
-    unsigned WordBits = ((BitWidth-1) % APINT_BITS_PER_WORD) + 1;
-
-    // Mask out the high bits.
-    uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - WordBits);
-    if (isSingleWord())
-      U.VAL &= mask;
-    else
-      U.pVal[getNumWords() - 1] &= mask;
-    return *this;
-  }
-
-  /// Get the word corresponding to a bit position
-  /// \returns the corresponding word for the specified bit position.
-  uint64_t getWord(unsigned bitPosition) const {
-    return isSingleWord() ? U.VAL : U.pVal[whichWord(bitPosition)];
-  }
-
-  /// Utility method to change the bit width of this APInt to new bit width,
-  /// allocating and/or deallocating as necessary. There is no guarantee on the
-  /// value of any bits upon return. Caller should populate the bits after.
-  void reallocate(unsigned NewBitWidth);
-
-  /// Convert a char array into an APInt
-  ///
-  /// \param radix 2, 8, 10, 16, or 36
-  /// Converts a string into a number.  The string must be non-empty
-  /// and well-formed as a number of the given base. The bit-width
-  /// must be sufficient to hold the result.
-  ///
-  /// This is used by the constructors that take string arguments.
-  ///
-  /// StringRef::getAsInteger is superficially similar but (1) does
-  /// not assume that the string is well-formed and (2) grows the
-  /// result to hold the input.
-  void fromString(unsigned numBits, StringRef str, uint8_t radix);
-
-  /// An internal division function for dividing APInts.
-  ///
-  /// This is used by the toString method to divide by the radix. It simply
-  /// provides a more convenient form of divide for internal use since KnuthDiv
-  /// has specific constraints on its inputs. If those constraints are not met
-  /// then it provides a simpler form of divide.
-  static void divide(const WordType *LHS, unsigned lhsWords,
-                     const WordType *RHS, unsigned rhsWords, WordType *Quotient,
-                     WordType *Remainder);
-
-  /// out-of-line slow case for inline constructor
-  void initSlowCase(uint64_t val, bool isSigned);
-
-  /// shared code between two array constructors
-  void initFromArray(ArrayRef<uint64_t> array);
-
-  /// out-of-line slow case for inline copy constructor
-  void initSlowCase(const APInt &that);
-
-  /// out-of-line slow case for shl
-  void shlSlowCase(unsigned ShiftAmt);
-
-  /// out-of-line slow case for lshr.
-  void lshrSlowCase(unsigned ShiftAmt);
-
-  /// out-of-line slow case for ashr.
-  void ashrSlowCase(unsigned ShiftAmt);
-
-  /// out-of-line slow case for operator=
-  void AssignSlowCase(const APInt &RHS);
-
-  /// out-of-line slow case for operator==
-  bool EqualSlowCase(const APInt &RHS) const LLVM_READONLY;
-
-  /// out-of-line slow case for countLeadingZeros
-  unsigned countLeadingZerosSlowCase() const LLVM_READONLY;
-
-  /// out-of-line slow case for countLeadingOnes.
-  unsigned countLeadingOnesSlowCase() const LLVM_READONLY;
-
-  /// out-of-line slow case for countTrailingZeros.
-  unsigned countTrailingZerosSlowCase() const LLVM_READONLY;
-
-  /// out-of-line slow case for countTrailingOnes
-  unsigned countTrailingOnesSlowCase() const LLVM_READONLY;
-
-  /// out-of-line slow case for countPopulation
-  unsigned countPopulationSlowCase() const LLVM_READONLY;
-
-  /// out-of-line slow case for intersects.
-  bool intersectsSlowCase(const APInt &RHS) const LLVM_READONLY;
-
-  /// out-of-line slow case for isSubsetOf.
-  bool isSubsetOfSlowCase(const APInt &RHS) const LLVM_READONLY;
-
-  /// out-of-line slow case for setBits.
-  void setBitsSlowCase(unsigned loBit, unsigned hiBit);
-
-  /// out-of-line slow case for flipAllBits.
-  void flipAllBitsSlowCase();
-
-  /// out-of-line slow case for operator&=.
-  void AndAssignSlowCase(const APInt& RHS);
-
-  /// out-of-line slow case for operator|=.
-  void OrAssignSlowCase(const APInt& RHS);
-
-  /// out-of-line slow case for operator^=.
-  void XorAssignSlowCase(const APInt& RHS);
-
-  /// Unsigned comparison. Returns -1, 0, or 1 if this APInt is less than, equal
-  /// to, or greater than RHS.
-  int compare(const APInt &RHS) const LLVM_READONLY;
-
-  /// Signed comparison. Returns -1, 0, or 1 if this APInt is less than, equal
-  /// to, or greater than RHS.
-  int compareSigned(const APInt &RHS) const LLVM_READONLY;
-
-public:
   /// \name Constructors
   /// @{
 
@@ -272,7 +107,6 @@ public:
   /// \param isSigned how to treat signedness of val
   APInt(unsigned numBits, uint64_t val, bool isSigned = false)
       : BitWidth(numBits) {
-    assert(BitWidth && "bitwidth too small");
     if (isSingleWord()) {
       U.VAL = val;
       clearUnusedBits();
@@ -312,7 +146,9 @@ public:
   /// \param radix the radix to use for the conversion
   APInt(unsigned numBits, StringRef str, uint8_t radix);
 
-  /// Simply makes *this a copy of that.
+  /// Default constructor that creates an APInt with a 1-bit zero value.
+  explicit APInt() : BitWidth(1) { U.VAL = 0; }
+
   /// Copy Constructor.
   APInt(const APInt &that) : BitWidth(that.BitWidth) {
     if (isSingleWord())
@@ -333,19 +169,131 @@ public:
       delete[] U.pVal;
   }
 
-  /// Default constructor that creates an uninteresting APInt
-  /// representing a 1-bit zero value.
+  /// @}
+  /// \name Value Generators
+  /// @{
+
+  /// Get the '0' value for the specified bit-width.
+  static APInt getZero(unsigned numBits) { return APInt(numBits, 0); }
+
+  /// NOTE: This is soft-deprecated.  Please use `getZero()` instead.
+  static APInt getNullValue(unsigned numBits) { return getZero(numBits); }
+
+  /// Return an APInt zero bits wide.
+  static APInt getZeroWidth() { return getZero(0); }
+
+  /// Gets maximum unsigned value of APInt for specific bit width.
+  static APInt getMaxValue(unsigned numBits) { return getAllOnes(numBits); }
+
+  /// Gets maximum signed value of APInt for a specific bit width.
+  static APInt getSignedMaxValue(unsigned numBits) {
+    APInt API = getAllOnes(numBits);
+    API.clearBit(numBits - 1);
+    return API;
+  }
+
+  /// Gets minimum unsigned value of APInt for a specific bit width.
+  static APInt getMinValue(unsigned numBits) { return APInt(numBits, 0); }
+
+  /// Gets minimum signed value of APInt for a specific bit width.
+  static APInt getSignedMinValue(unsigned numBits) {
+    APInt API(numBits, 0);
+    API.setBit(numBits - 1);
+    return API;
+  }
+
+  /// Get the SignMask for a specific bit width.
   ///
-  /// This is useful for object deserialization (pair this with the static
-  ///  method Read).
-  explicit APInt() : BitWidth(1) { U.VAL = 0; }
+  /// This is just a wrapper function of getSignedMinValue(), and it helps code
+  /// readability when we want to get a SignMask.
+  static APInt getSignMask(unsigned BitWidth) {
+    return getSignedMinValue(BitWidth);
+  }
 
-  /// Returns whether this instance allocated memory.
-  bool needsCleanup() const { return !isSingleWord(); }
+  /// Return an APInt of a specified width with all bits set.
+  static APInt getAllOnes(unsigned numBits) {
+    return APInt(numBits, WORDTYPE_MAX, true);
+  }
 
-  /// Used to insert APInt objects, or objects that contain APInt objects, into
-  ///  FoldingSets.
-  void Profile(FoldingSetNodeID &id) const;
+  /// NOTE: This is soft-deprecated.  Please use `getAllOnes()` instead.
+  static APInt getAllOnesValue(unsigned numBits) { return getAllOnes(numBits); }
+
+  /// Return an APInt with exactly one bit set in the result.
+  static APInt getOneBitSet(unsigned numBits, unsigned BitNo) {
+    APInt Res(numBits, 0);
+    Res.setBit(BitNo);
+    return Res;
+  }
+
+  /// Get a value with a block of bits set.
+  ///
+  /// Constructs an APInt value that has a contiguous range of bits set. The
+  /// bits from loBit (inclusive) to hiBit (exclusive) will be set. All other
+  /// bits will be zero. For example, with parameters(32, 0, 16) you would get
+  /// 0x0000FFFF. Please call getBitsSetWithWrap if \p loBit may be greater than
+  /// \p hiBit.
+  ///
+  /// \param numBits the intended bit width of the result
+  /// \param loBit the index of the lowest bit set.
+  /// \param hiBit the index of the highest bit set.
+  ///
+  /// \returns An APInt value with the requested bits set.
+  static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit) {
+    APInt Res(numBits, 0);
+    Res.setBits(loBit, hiBit);
+    return Res;
+  }
+
+  /// Wrap version of getBitsSet.
+  /// If \p hiBit is bigger than \p loBit, this is same with getBitsSet.
+  /// If \p hiBit is not bigger than \p loBit, the set bits "wrap". For example,
+  /// with parameters (32, 28, 4), you would get 0xF000000F.
+  /// If \p hiBit is equal to \p loBit, you would get a result with all bits
+  /// set.
+  static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit,
+                                  unsigned hiBit) {
+    APInt Res(numBits, 0);
+    Res.setBitsWithWrap(loBit, hiBit);
+    return Res;
+  }
+
+  /// Constructs an APInt value that has a contiguous range of bits set. The
+  /// bits from loBit (inclusive) to numBits (exclusive) will be set. All other
+  /// bits will be zero. For example, with parameters(32, 12) you would get
+  /// 0xFFFFF000.
+  ///
+  /// \param numBits the intended bit width of the result
+  /// \param loBit the index of the lowest bit to set.
+  ///
+  /// \returns An APInt value with the requested bits set.
+  static APInt getBitsSetFrom(unsigned numBits, unsigned loBit) {
+    APInt Res(numBits, 0);
+    Res.setBitsFrom(loBit);
+    return Res;
+  }
+
+  /// Constructs an APInt value that has the top hiBitsSet bits set.
+  ///
+  /// \param numBits the bitwidth of the result
+  /// \param hiBitsSet the number of high-order bits set in the result.
+  static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet) {
+    APInt Res(numBits, 0);
+    Res.setHighBits(hiBitsSet);
+    return Res;
+  }
+
+  /// Constructs an APInt value that has the bottom loBitsSet bits set.
+  ///
+  /// \param numBits the bitwidth of the result
+  /// \param loBitsSet the number of low-order bits set in the result.
+  static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet) {
+    APInt Res(numBits, 0);
+    Res.setLowBits(loBitsSet);
+    return Res;
+  }
+
+  /// Return a value containing V broadcasted over NewLen bits.
+  static APInt getSplat(unsigned NewLen, const APInt &V);
 
   /// @}
   /// \name Value Tests
@@ -373,7 +321,7 @@ public:
   /// This tests the high bit of this APInt to determine if it is set.
   ///
   /// \returns true if this APInt has its sign bit set, false otherwise.
-  bool isSignBitSet() const { return (*this)[BitWidth-1]; }
+  bool isSignBitSet() const { return (*this)[BitWidth - 1]; }
 
   /// Determine if sign bit of this APInt is clear.
   ///
@@ -388,50 +336,62 @@ public:
   /// that 0 is not a positive value.
   ///
   /// \returns true if this APInt is positive.
-  bool isStrictlyPositive() const { return isNonNegative() && !isNullValue(); }
+  bool isStrictlyPositive() const { return isNonNegative() && !isZero(); }
 
   /// Determine if this APInt Value is non-positive (<= 0).
   ///
   /// \returns true if this APInt is non-positive.
   bool isNonPositive() const { return !isStrictlyPositive(); }
 
-  /// Determine if all bits are set
-  ///
-  /// This checks to see if the value has all bits of the APInt are set or not.
-  bool isAllOnesValue() const {
+  /// Determine if all bits are set.  This is true for zero-width values.
+  bool isAllOnes() const {
+    if (BitWidth == 0)
+      return true;
     if (isSingleWord())
       return U.VAL == WORDTYPE_MAX >> (APINT_BITS_PER_WORD - BitWidth);
     return countTrailingOnesSlowCase() == BitWidth;
   }
 
-  /// Determine if all bits are clear
-  ///
-  /// This checks to see if the value has all bits of the APInt are clear or
-  /// not.
-  bool isNullValue() const { return !*this; }
+  /// NOTE: This is soft-deprecated.  Please use `isAllOnes()` instead.
+  bool isAllOnesValue() const { return isAllOnes(); }
+
+  /// Determine if this value is zero, i.e. all bits are clear.
+  bool isZero() const {
+    if (isSingleWord())
+      return U.VAL == 0;
+    return countLeadingZerosSlowCase() == BitWidth;
+  }
+
+  /// NOTE: This is soft-deprecated.  Please use `isZero()` instead.
+  bool isNullValue() const { return isZero(); }
 
   /// Determine if this is a value of 1.
   ///
   /// This checks to see if the value of this APInt is one.
-  bool isOneValue() const {
+  bool isOne() const {
     if (isSingleWord())
       return U.VAL == 1;
     return countLeadingZerosSlowCase() == BitWidth - 1;
   }
 
+  /// NOTE: This is soft-deprecated.  Please use `isOne()` instead.
+  bool isOneValue() const { return isOne(); }
+
   /// Determine if this is the largest unsigned value.
   ///
   /// This checks to see if the value of this APInt is the maximum unsigned
   /// value for the APInt's bit width.
-  bool isMaxValue() const { return isAllOnesValue(); }
+  bool isMaxValue() const { return isAllOnes(); }
 
   /// Determine if this is the largest signed value.
   ///
   /// This checks to see if the value of this APInt is the maximum signed
   /// value for the APInt's bit width.
   bool isMaxSignedValue() const {
-    if (isSingleWord())
+    if (isSingleWord()) {
+      assert(BitWidth && "zero width values not allowed");
       return U.VAL == ((WordType(1) << (BitWidth - 1)) - 1);
+    }
     return !isNegative() && countTrailingOnesSlowCase() == BitWidth - 1;
   }
 
@@ -439,39 +399,48 @@ public:
   ///
   /// This checks to see if the value of this APInt is the minimum unsigned
   /// value for the APInt's bit width.
-  bool isMinValue() const { return isNullValue(); }
+  bool isMinValue() const { return isZero(); }
 
   /// Determine if this is the smallest signed value.
   ///
   /// This checks to see if the value of this APInt is the minimum signed
   /// value for the APInt's bit width.
   bool isMinSignedValue() const {
-    if (isSingleWord())
+    if (isSingleWord()) {
+      assert(BitWidth && "zero width values not allowed");
       return U.VAL == (WordType(1) << (BitWidth - 1));
+    }
     return isNegative() && countTrailingZerosSlowCase() == BitWidth - 1;
   }
 
   /// Check if this APInt has an N-bits unsigned integer value.
-  bool isIntN(unsigned N) const {
-    assert(N && "N == 0 ???");
-    return getActiveBits() <= N;
-  }
+  bool isIntN(unsigned N) const { return getActiveBits() <= N; }
 
   /// Check if this APInt has an N-bits signed integer value.
-  bool isSignedIntN(unsigned N) const {
-    assert(N && "N == 0 ???");
-    return getMinSignedBits() <= N;
-  }
+  bool isSignedIntN(unsigned N) const { return getMinSignedBits() <= N; }
 
   /// Check if this APInt's value is a power of two greater than zero.
   ///
   /// \returns true if the argument APInt value is a power of two > 0.
   bool isPowerOf2() const {
-    if (isSingleWord())
+    if (isSingleWord()) {
+      assert(BitWidth && "zero width values not allowed");
       return isPowerOf2_64(U.VAL);
+    }
     return countPopulationSlowCase() == 1;
   }
 
+  /// Check if this APInt's negated value is a power of two greater than zero.
+  bool isNegatedPowerOf2() const {
+    assert(BitWidth && "zero width values not allowed");
+    if (isNonNegative())
+      return false;
+    // NegatedPowerOf2 - shifted mask in the top bits.
+    unsigned LO = countLeadingOnes();
+    unsigned TZ = countTrailingZeros();
+    return (LO + TZ) == BitWidth;
+  }
+
   /// Check if the APInt's value is returned by getSignMask.
   ///
   /// \returns true if this is the value returned by getSignMask.
@@ -480,7 +449,7 @@ public:
   /// Convert APInt to a boolean value.
   ///
   /// This converts the APInt to a boolean value as a test against zero.
-  bool getBoolValue() const { return !!*this; }
+  bool getBoolValue() const { return !isZero(); }
 
   /// If this value is smaller than the specified limit, return it, otherwise
   /// return the limit value.  This causes the value to saturate to the limit.
@@ -503,175 +472,45 @@ public:
     if (isSingleWord())
       return U.VAL == (WORDTYPE_MAX >> (APINT_BITS_PER_WORD - numBits));
     unsigned Ones = countTrailingOnesSlowCase();
-    return (numBits == Ones) &&
-           ((Ones + countLeadingZerosSlowCase()) == BitWidth);
-  }
-
-  /// \returns true if this APInt is a non-empty sequence of ones starting at
-  /// the least significant bit with the remainder zero.
-  /// Ex. isMask(0x0000FFFFU) == true.
-  bool isMask() const {
-    if (isSingleWord())
-      return isMask_64(U.VAL);
-    unsigned Ones = countTrailingOnesSlowCase();
-    return (Ones > 0) && ((Ones + countLeadingZerosSlowCase()) == BitWidth);
-  }
-
-  /// Return true if this APInt value contains a sequence of ones with
-  /// the remainder zero.
-  bool isShiftedMask() const {
-    if (isSingleWord())
-      return isShiftedMask_64(U.VAL);
-    unsigned Ones = countPopulationSlowCase();
-    unsigned LeadZ = countLeadingZerosSlowCase();
-    return (Ones + LeadZ + countTrailingZeros()) == BitWidth;
-  }
-
-  /// @}
-  /// \name Value Generators
-  /// @{
-
-  /// Gets maximum unsigned value of APInt for specific bit width.
-  static APInt getMaxValue(unsigned numBits) {
-    return getAllOnesValue(numBits);
-  }
-
-  /// Gets maximum signed value of APInt for a specific bit width.
-  static APInt getSignedMaxValue(unsigned numBits) {
-    APInt API = getAllOnesValue(numBits);
-    API.clearBit(numBits - 1);
-    return API;
-  }
-
-  /// Gets minimum unsigned value of APInt for a specific bit width.
-  static APInt getMinValue(unsigned numBits) { return APInt(numBits, 0); }
-
-  /// Gets minimum signed value of APInt for a specific bit width.
-  static APInt getSignedMinValue(unsigned numBits) {
-    APInt API(numBits, 0);
-    API.setBit(numBits - 1);
-    return API;
-  }
-
-  /// Get the SignMask for a specific bit width.
-  ///
-  /// This is just a wrapper function of getSignedMinValue(), and it helps code
-  /// readability when we want to get a SignMask.
-  static APInt getSignMask(unsigned BitWidth) {
-    return getSignedMinValue(BitWidth);
-  }
-
-  /// Get the all-ones value.
-  ///
-  /// \returns the all-ones value for an APInt of the specified bit-width.
-  static APInt getAllOnesValue(unsigned numBits) {
-    return APInt(numBits, WORDTYPE_MAX, true);
-  }
-
-  /// Get the '0' value.
-  ///
-  /// \returns the '0' value for an APInt of the specified bit-width.
-  static APInt getNullValue(unsigned numBits) { return APInt(numBits, 0); }
-
-  /// Compute an APInt containing numBits highbits from this APInt.
-  ///
-  /// Get an APInt with the same BitWidth as this APInt, just zero mask
-  /// the low bits and right shift to the least significant bit.
-  ///
-  /// \returns the high "numBits" bits of this APInt.
-  APInt getHiBits(unsigned numBits) const;
-
-  /// Compute an APInt containing numBits lowbits from this APInt.
-  ///
-  /// Get an APInt with the same BitWidth as this APInt, just zero mask
-  /// the high bits.
-  ///
-  /// \returns the low "numBits" bits of this APInt.
-  APInt getLoBits(unsigned numBits) const;
-
-  /// Return an APInt with exactly one bit set in the result.
-  static APInt getOneBitSet(unsigned numBits, unsigned BitNo) {
-    APInt Res(numBits, 0);
-    Res.setBit(BitNo);
-    return Res;
-  }
-
-  /// Get a value with a block of bits set.
-  ///
-  /// Constructs an APInt value that has a contiguous range of bits set. The
-  /// bits from loBit (inclusive) to hiBit (exclusive) will be set. All other
-  /// bits will be zero. For example, with parameters(32, 0, 16) you would get
-  /// 0x0000FFFF. Please call getBitsSetWithWrap if \p loBit may be greater than
-  /// \p hiBit.
-  ///
-  /// \param numBits the intended bit width of the result
-  /// \param loBit the index of the lowest bit set.
-  /// \param hiBit the index of the highest bit set.
-  ///
-  /// \returns An APInt value with the requested bits set.
-  static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit) {
-    assert(loBit <= hiBit && "loBit greater than hiBit");
-    APInt Res(numBits, 0);
-    Res.setBits(loBit, hiBit);
-    return Res;
-  }
-
-  /// Wrap version of getBitsSet.
-  /// If \p hiBit is bigger than \p loBit, this is same with getBitsSet.
-  /// If \p hiBit is not bigger than \p loBit, the set bits "wrap". For example,
-  /// with parameters (32, 28, 4), you would get 0xF000000F.
-  /// If \p hiBit is equal to \p loBit, you would get a result with all bits
-  /// set.
-  static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit,
-                                  unsigned hiBit) {
-    APInt Res(numBits, 0);
-    Res.setBitsWithWrap(loBit, hiBit);
-    return Res;
+    return (numBits == Ones) &&
+           ((Ones + countLeadingZerosSlowCase()) == BitWidth);
   }
 
-  /// Get a value with upper bits starting at loBit set.
-  ///
-  /// Constructs an APInt value that has a contiguous range of bits set. The
-  /// bits from loBit (inclusive) to numBits (exclusive) will be set. All other
-  /// bits will be zero. For example, with parameters(32, 12) you would get
-  /// 0xFFFFF000.
-  ///
-  /// \param numBits the intended bit width of the result
-  /// \param loBit the index of the lowest bit to set.
-  ///
-  /// \returns An APInt value with the requested bits set.
-  static APInt getBitsSetFrom(unsigned numBits, unsigned loBit) {
-    APInt Res(numBits, 0);
-    Res.setBitsFrom(loBit);
-    return Res;
+  /// \returns true if this APInt is a non-empty sequence of ones starting at
+  /// the least significant bit with the remainder zero.
+  /// Ex. isMask(0x0000FFFFU) == true.
+  bool isMask() const {
+    if (isSingleWord())
+      return isMask_64(U.VAL);
+    unsigned Ones = countTrailingOnesSlowCase();
+    return (Ones > 0) && ((Ones + countLeadingZerosSlowCase()) == BitWidth);
   }
 
-  /// Get a value with high bits set
-  ///
-  /// Constructs an APInt value that has the top hiBitsSet bits set.
-  ///
-  /// \param numBits the bitwidth of the result
-  /// \param hiBitsSet the number of high-order bits set in the result.
-  static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet) {
-    APInt Res(numBits, 0);
-    Res.setHighBits(hiBitsSet);
-    return Res;
+  /// Return true if this APInt value contains a sequence of ones with
+  /// the remainder zero.
+  bool isShiftedMask() const {
+    if (isSingleWord())
+      return isShiftedMask_64(U.VAL);
+    unsigned Ones = countPopulationSlowCase();
+    unsigned LeadZ = countLeadingZerosSlowCase();
+    return (Ones + LeadZ + countTrailingZeros()) == BitWidth;
   }
 
-  /// Get a value with low bits set
+  /// Compute an APInt containing numBits highbits from this APInt.
   ///
-  /// Constructs an APInt value that has the bottom loBitsSet bits set.
+  /// Get an APInt with the same BitWidth as this APInt, just zero mask the low
+  /// bits and right shift to the least significant bit.
   ///
-  /// \param numBits the bitwidth of the result
-  /// \param loBitsSet the number of low-order bits set in the result.
-  static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet) {
-    APInt Res(numBits, 0);
-    Res.setLowBits(loBitsSet);
-    return Res;
-  }
+  /// \returns the high "numBits" bits of this APInt.
+  APInt getHiBits(unsigned numBits) const;
 
-  /// Return a value containing V broadcasted over NewLen bits.
-  static APInt getSplat(unsigned NewLen, const APInt &V);
+  /// Compute an APInt containing numBits lowbits from this APInt.
+  ///
+  /// Get an APInt with the same BitWidth as this APInt, just zero mask the high
+  /// bits.
+  ///
+  /// \returns the low "numBits" bits of this APInt.
+  APInt getLoBits(unsigned numBits) const;
 
   /// Determine if two APInts have the same value, after zero-extending
   /// one of them (if needed!) to ensure that the bit-widths match.
@@ -701,12 +540,10 @@ public:
   /// \name Unary Operators
   /// @{
 
-  /// Postfix increment operator.
-  ///
-  /// Increments *this by 1.
+  /// Postfix increment operator.  Increment *this by 1.
   ///
   /// \returns a new APInt value representing the original value of *this.
-  const APInt operator++(int) {
+  APInt operator++(int) {
     APInt API(*this);
     ++(*this);
     return API;
@@ -717,12 +554,10 @@ public:
   /// \returns *this incremented by one
   APInt &operator++();
 
-  /// Postfix decrement operator.
-  ///
-  /// Decrements *this by 1.
+  /// Postfix decrement operator. Decrement *this by 1.
   ///
   /// \returns a new APInt value representing the original value of *this.
-  const APInt operator--(int) {
+  APInt operator--(int) {
     APInt API(*this);
     --(*this);
     return API;
@@ -733,16 +568,9 @@ public:
   /// \returns *this decremented by one.
   APInt &operator--();
 
-  /// Logical negation operator.
-  ///
-  /// Performs logical negation operation on this APInt.
-  ///
-  /// \returns true if *this is zero, false otherwise.
-  bool operator!() const {
-    if (isSingleWord())
-      return U.VAL == 0;
-    return countLeadingZerosSlowCase() == BitWidth;
-  }
+  /// Logical negation operation on this APInt returns true if zero, like normal
+  /// integers.
+  bool operator!() const { return isZero(); }
 
   /// @}
   /// \name Assignment Operators
@@ -752,14 +580,15 @@ public:
   ///
   /// \returns *this after assignment of RHS.
   APInt &operator=(const APInt &RHS) {
-    // If the bitwidths are the same, we can avoid mucking with memory
+    // The common case (both source or dest being inline) doesn't require
+    // allocation or deallocation.
     if (isSingleWord() && RHS.isSingleWord()) {
       U.VAL = RHS.U.VAL;
       BitWidth = RHS.BitWidth;
-      return clearUnusedBits();
+      return *this;
     }
 
-    AssignSlowCase(RHS);
+    assignSlowCase(RHS);
     return *this;
   }
 
@@ -780,7 +609,6 @@ public:
 
     BitWidth = that.BitWidth;
     that.BitWidth = 0;
-
     return *this;
   }
 
@@ -812,7 +640,7 @@ public:
     if (isSingleWord())
       U.VAL &= RHS.U.VAL;
     else
-      AndAssignSlowCase(RHS);
+      andAssignSlowCase(RHS);
     return *this;
   }
 
@@ -827,7 +655,7 @@ public:
       return *this;
     }
     U.pVal[0] &= RHS;
-    memset(U.pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE);
+    memset(U.pVal + 1, 0, (getNumWords() - 1) * APINT_WORD_SIZE);
     return *this;
   }
 
@@ -842,7 +670,7 @@ public:
     if (isSingleWord())
       U.VAL |= RHS.U.VAL;
     else
-      OrAssignSlowCase(RHS);
+      orAssignSlowCase(RHS);
     return *this;
   }
 
@@ -871,7 +699,7 @@ public:
     if (isSingleWord())
       U.VAL ^= RHS.U.VAL;
     else
-      XorAssignSlowCase(RHS);
+      xorAssignSlowCase(RHS);
     return *this;
   }
 
@@ -1057,6 +885,17 @@ public:
   /// Rotate right by rotateAmt.
   APInt rotr(const APInt &rotateAmt) const;
 
+  /// Concatenate the bits from "NewLSB" onto the bottom of *this.  This is
+  /// equivalent to:
+  ///   (this->zext(NewWidth) << NewLSB.getBitWidth()) | NewLSB.zext(NewWidth)
+  APInt concat(const APInt &NewLSB) const {
+    /// If the result will be small, then both the merged values are small.
+    unsigned NewWidth = getBitWidth() + NewLSB.getBitWidth();
+    if (NewWidth <= APINT_BITS_PER_WORD)
+      return APInt(NewWidth, (U.VAL << NewLSB.getBitWidth()) | NewLSB.U.VAL);
+    return concatSlowCase(NewLSB);
+  }
+
   /// Unsigned division operation.
   ///
   /// Perform an unsigned divide operation on this APInt by RHS. Both this and
@@ -1151,7 +990,7 @@ public:
     assert(BitWidth == RHS.BitWidth && "Comparison requires equal bit widths");
     if (isSingleWord())
       return U.VAL == RHS.U.VAL;
-    return EqualSlowCase(RHS);
+    return equalSlowCase(RHS);
   }
 
   /// Equality operator.
@@ -1436,8 +1275,6 @@ public:
     clearUnusedBits();
   }
 
-  /// Set a given bit to 1.
-  ///
   /// Set the given bit to 1 whose position is given as "bitPosition".
   void setBit(unsigned BitPosition) {
     assert(BitPosition < BitWidth && "BitPosition out of range");
@@ -1449,9 +1286,7 @@ public:
   }
 
   /// Set the sign bit to 1.
-  void setSignBit() {
-    setBit(BitWidth - 1);
-  }
+  void setSignBit() { setBit(BitWidth - 1); }
 
   /// Set a given bit to a given value.
   void setBitVal(unsigned BitPosition, bool BitValue) {
@@ -1497,14 +1332,10 @@ public:
   }
 
   /// Set the top bits starting from loBit.
-  void setBitsFrom(unsigned loBit) {
-    return setBits(loBit, BitWidth);
-  }
+  void setBitsFrom(unsigned loBit) { return setBits(loBit, BitWidth); }
 
   /// Set the bottom loBits bits.
-  void setLowBits(unsigned loBits) {
-    return setBits(0, loBits);
-  }
+  void setLowBits(unsigned loBits) { return setBits(0, loBits); }
 
   /// Set the top hiBits bits.
   void setHighBits(unsigned hiBits) {
@@ -1539,9 +1370,7 @@ public:
   }
 
   /// Set the sign bit to 0.
-  void clearSignBit() {
-    clearBit(BitWidth - 1);
-  }
+  void clearSignBit() { clearBit(BitWidth - 1); }
 
   /// Toggle every bit to its opposite value.
   void flipAllBits() {
@@ -1629,8 +1458,10 @@ public:
   /// uint64_t. The bitwidth must be <= 64 or the value must fit within a
   /// uint64_t. Otherwise an assertion will result.
   uint64_t getZExtValue() const {
-    if (isSingleWord())
+    if (isSingleWord()) {
+      assert(BitWidth && "zero width values not allowed");
       return U.VAL;
+    }
     assert(getActiveBits() <= 64 && "Too many bits for uint64_t");
     return U.pVal[0];
   }
@@ -1678,8 +1509,11 @@ public:
   /// \returns 0 if the high order bit is not set, otherwise returns the number
   /// of 1 bits from the most significant to the least
   unsigned countLeadingOnes() const {
-    if (isSingleWord())
+    if (isSingleWord()) {
+      if (LLVM_UNLIKELY(BitWidth == 0))
+        return 0;
       return llvm::countLeadingOnes(U.VAL << (APINT_BITS_PER_WORD - BitWidth));
+    }
     return countLeadingOnesSlowCase();
   }
 
@@ -1774,9 +1608,7 @@ public:
   /// The conversion does not do a translation from integer to double, it just
   /// re-interprets the bits as a double. Note that it is valid to do this on
   /// any bit width. Exactly 64 bits will be translated.
-  double bitsToDouble() const {
-    return BitsToDouble(getWord(0));
-  }
+  double bitsToDouble() const { return BitsToDouble(getWord(0)); }
 
   /// Converts APInt bits to a float
   ///
@@ -1808,7 +1640,7 @@ public:
   /// @{
 
   /// \returns the floor log base 2 of this APInt.
-  unsigned logBase2() const { return getActiveBits() -  1; }
+  unsigned logBase2() const { return getActiveBits() - 1; }
 
   /// \returns the ceil log base 2 of this APInt.
   unsigned ceilLogBase2() const {
@@ -1826,25 +1658,7 @@ public:
   ///
   /// to get around any mathematical concerns resulting from
   /// referencing 2 in a space where 2 does no exist.
-  unsigned nearestLogBase2() const {
-    // Special case when we have a bitwidth of 1. If VAL is 1, then we
-    // get 0. If VAL is 0, we get WORDTYPE_MAX which gets truncated to
-    // UINT32_MAX.
-    if (BitWidth == 1)
-      return U.VAL - 1;
-
-    // Handle the zero case.
-    if (isNullValue())
-      return UINT32_MAX;
-
-    // The non-zero case is handled by computing:
-    //
-    //   nearestLogBase2(x) = logBase2(x) + x[logBase2(x)-1].
-    //
-    // where x[i] is referring to the value of the ith bit of x.
-    unsigned lg = logBase2();
-    return lg + unsigned((*this)[lg - 1]);
-  }
+  unsigned nearestLogBase2() const;
 
   /// \returns the log base 2 of this APInt if its an exact power of two, -1
   /// otherwise
@@ -1854,12 +1668,12 @@ public:
     return logBase2();
   }
 
-  /// Compute the square root
+  /// Compute the square root.
   APInt sqrt() const;
 
-  /// Get the absolute value;
-  ///
-  /// If *this is < 0 then return -(*this), otherwise *this;
+  /// Get the absolute value.  If *this is < 0 then return -(*this), otherwise
+  /// *this.  Note that the "most negative" signed number (e.g. -128 for 8 bit
+  /// wide APInt) is unchanged due to how negation works.
   APInt abs() const {
     if (isNegative())
       return -(*this);
@@ -1869,18 +1683,6 @@ public:
   /// \returns the multiplicative inverse for a given modulo.
   APInt multiplicativeInverse(const APInt &modulo) const;
 
-  /// @}
-  /// \name Support for division by constant
-  /// @{
-
-  /// Calculate the magic number for signed division by a constant.
-  struct ms;
-  ms magic() const;
-
-  /// Calculate the magic number for unsigned division by a constant.
-  struct mu;
-  mu magicu(unsigned LeadingZeros = 0) const;
-
   /// @}
   /// \name Building-block Operations for APInt and APFloat
   /// @{
@@ -1908,9 +1710,8 @@ public:
   /// DST, of dstCOUNT parts, such that the bit srcLSB becomes the least
   /// significant bit of DST.  All high bits above srcBITS in DST are
   /// zero-filled.
-  static void tcExtract(WordType *, unsigned dstCount,
-                        const WordType *, unsigned srcBits,
-                        unsigned srcLSB);
+  static void tcExtract(WordType *, unsigned dstCount, const WordType *,
+                        unsigned srcBits, unsigned srcLSB);
 
   /// Set the given bit of a bignum.  Zero-based.
   static void tcSetBit(WordType *, unsigned bit);
@@ -1927,14 +1728,13 @@ public:
   static void tcNegate(WordType *, unsigned);
 
   /// DST += RHS + CARRY where CARRY is zero or one.  Returns the carry flag.
-  static WordType tcAdd(WordType *, const WordType *,
-                        WordType carry, unsigned);
+  static WordType tcAdd(WordType *, const WordType *, WordType carry, unsigned);
   /// DST += RHS.  Returns the carry flag.
   static WordType tcAddPart(WordType *, WordType, unsigned);
 
   /// DST -= RHS + CARRY where CARRY is zero or one. Returns the carry flag.
-  static WordType tcSubtract(WordType *, const WordType *,
-                             WordType carry, unsigned);
+  static WordType tcSubtract(WordType *, const WordType *, WordType carry,
+                             unsigned);
   /// DST -= RHS.  Returns the carry flag.
   static WordType tcSubtractPart(WordType *, WordType, unsigned);
 
@@ -1950,8 +1750,7 @@ public:
   /// otherwise overflow occurred and return one.
   static int tcMultiplyPart(WordType *dst, const WordType *src,
                             WordType multiplier, WordType carry,
-                            unsigned srcParts, unsigned dstParts,
-                            bool add);
+                            unsigned srcParts, unsigned dstParts, bool add);
 
   /// DST = LHS * RHS, where DST has the same width as the operands and is
   /// filled with the least significant parts of the result.  Returns one if
@@ -1962,8 +1761,8 @@ public:
 
   /// DST = LHS * RHS, where DST has width the sum of the widths of the
   /// operands. No overflow occurs. DST must be disjoint from both operands.
-  static void tcFullMultiply(WordType *, const WordType *,
-                             const WordType *, unsigned, unsigned);
+  static void tcFullMultiply(WordType *, const WordType *, const WordType *,
+                             unsigned, unsigned);
 
   /// If RHS is zero LHS and REMAINDER are left unchanged, return one.
   /// Otherwise set LHS to LHS / RHS with the fractional part discarded, set
@@ -1974,9 +1773,8 @@ public:
   /// SCRATCH is a bignum of the same size as the operands and result for use by
   /// the routine; its contents need not be initialized and are destroyed.  LHS,
   /// REMAINDER and SCRATCH must be distinct.
-  static int tcDivide(WordType *lhs, const WordType *rhs,
-                      WordType *remainder, WordType *scratch,
-                      unsigned parts);
+  static int tcDivide(WordType *lhs, const WordType *rhs, WordType *remainder,
+                      WordType *scratch, unsigned parts);
 
   /// Shift a bignum left Count bits. Shifted in bits are zero. There are no
   /// restrictions on Count.
@@ -1986,12 +1784,6 @@ public:
   /// restrictions on Count.
   static void tcShiftRight(WordType *, unsigned Words, unsigned Count);
 
-  /// The obvious AND, OR and XOR and complement operations.
-  static void tcAnd(WordType *, const WordType *, unsigned);
-  static void tcOr(WordType *, const WordType *, unsigned);
-  static void tcXor(WordType *, const WordType *, unsigned);
-  static void tcComplement(WordType *, unsigned);
-
   /// Comparison (unsigned) of two bignums.
   static int tcCompare(const WordType *, const WordType *, unsigned);
 
@@ -2005,26 +1797,185 @@ public:
     return tcSubtractPart(dst, 1, parts);
   }
 
-  /// Set the least significant BITS and clear the rest.
-  static void tcSetLeastSignificantBits(WordType *, unsigned, unsigned bits);
+  /// Used to insert APInt objects, or objects that contain APInt objects, into
+  ///  FoldingSets.
+  void Profile(FoldingSetNodeID &id) const;
 
   /// debug method
   void dump() const;
 
-  /// @}
-};
+  /// Returns whether this instance allocated memory.
+  bool needsCleanup() const { return !isSingleWord(); }
 
-/// Magic data for optimising signed division by a constant.
-struct APInt::ms {
-  APInt m;    ///< magic number
-  unsigned s; ///< shift amount
-};
+private:
+  /// This union is used to store the integer value. When the
+  /// integer bit-width <= 64, it uses VAL, otherwise it uses pVal.
+  union {
+    uint64_t VAL;   ///< Used to store the <= 64 bits integer value.
+    uint64_t *pVal; ///< Used to store the >64 bits integer value.
+  } U;
+
+  unsigned BitWidth; ///< The number of bits in this APInt.
+
+  friend struct DenseMapInfo<APInt, void>;
+  friend class APSInt;
+
+  /// This constructor is used only internally for speed of construction of
+  /// temporaries. It is unsafe since it takes ownership of the pointer, so it
+  /// is not public.
+  APInt(uint64_t *val, unsigned bits) : BitWidth(bits) { U.pVal = val; }
+
+  /// Determine which word a bit is in.
+  ///
+  /// \returns the word position for the specified bit position.
+  static unsigned whichWord(unsigned bitPosition) {
+    return bitPosition / APINT_BITS_PER_WORD;
+  }
+
+  /// Determine which bit in a word the specified bit position is in.
+  static unsigned whichBit(unsigned bitPosition) {
+    return bitPosition % APINT_BITS_PER_WORD;
+  }
+
+  /// Get a single bit mask.
+  ///
+  /// \returns a uint64_t with only bit at "whichBit(bitPosition)" set
+  /// This method generates and returns a uint64_t (word) mask for a single
+  /// bit at a specific bit position. This is used to mask the bit in the
+  /// corresponding word.
+  static uint64_t maskBit(unsigned bitPosition) {
+    return 1ULL << whichBit(bitPosition);
+  }
+
+  /// Clear unused high order bits
+  ///
+  /// This method is used internally to clear the top "N" bits in the high order
+  /// word that are not used by the APInt. This is needed after the most
+  /// significant word is assigned a value to ensure that those bits are
+  /// zero'd out.
+  APInt &clearUnusedBits() {
+    // Compute how many bits are used in the final word.
+    unsigned WordBits = ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1;
+
+    // Mask out the high bits.
+    uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - WordBits);
+    if (LLVM_UNLIKELY(BitWidth == 0))
+      mask = 0;
+
+    if (isSingleWord())
+      U.VAL &= mask;
+    else
+      U.pVal[getNumWords() - 1] &= mask;
+    return *this;
+  }
+
+  /// Get the word corresponding to a bit position
+  /// \returns the corresponding word for the specified bit position.
+  uint64_t getWord(unsigned bitPosition) const {
+    return isSingleWord() ? U.VAL : U.pVal[whichWord(bitPosition)];
+  }
+
+  /// Utility method to change the bit width of this APInt to new bit width,
+  /// allocating and/or deallocating as necessary. There is no guarantee on the
+  /// value of any bits upon return. Caller should populate the bits after.
+  void reallocate(unsigned NewBitWidth);
+
+  /// Convert a char array into an APInt
+  ///
+  /// \param radix 2, 8, 10, 16, or 36
+  /// Converts a string into a number.  The string must be non-empty
+  /// and well-formed as a number of the given base. The bit-width
+  /// must be sufficient to hold the result.
+  ///
+  /// This is used by the constructors that take string arguments.
+  ///
+  /// StringRef::getAsInteger is superficially similar but (1) does
+  /// not assume that the string is well-formed and (2) grows the
+  /// result to hold the input.
+  void fromString(unsigned numBits, StringRef str, uint8_t radix);
+
+  /// An internal division function for dividing APInts.
+  ///
+  /// This is used by the toString method to divide by the radix. It simply
+  /// provides a more convenient form of divide for internal use since KnuthDiv
+  /// has specific constraints on its inputs. If those constraints are not met
+  /// then it provides a simpler form of divide.
+  static void divide(const WordType *LHS, unsigned lhsWords,
+                     const WordType *RHS, unsigned rhsWords, WordType *Quotient,
+                     WordType *Remainder);
+
+  /// out-of-line slow case for inline constructor
+  void initSlowCase(uint64_t val, bool isSigned);
+
+  /// shared code between two array constructors
+  void initFromArray(ArrayRef<uint64_t> array);
+
+  /// out-of-line slow case for inline copy constructor
+  void initSlowCase(const APInt &that);
+
+  /// out-of-line slow case for shl
+  void shlSlowCase(unsigned ShiftAmt);
+
+  /// out-of-line slow case for lshr.
+  void lshrSlowCase(unsigned ShiftAmt);
+
+  /// out-of-line slow case for ashr.
+  void ashrSlowCase(unsigned ShiftAmt);
+
+  /// out-of-line slow case for operator=
+  void assignSlowCase(const APInt &RHS);
+
+  /// out-of-line slow case for operator==
+  bool equalSlowCase(const APInt &RHS) const LLVM_READONLY;
+
+  /// out-of-line slow case for countLeadingZeros
+  unsigned countLeadingZerosSlowCase() const LLVM_READONLY;
+
+  /// out-of-line slow case for countLeadingOnes.
+  unsigned countLeadingOnesSlowCase() const LLVM_READONLY;
+
+  /// out-of-line slow case for countTrailingZeros.
+  unsigned countTrailingZerosSlowCase() const LLVM_READONLY;
+
+  /// out-of-line slow case for countTrailingOnes
+  unsigned countTrailingOnesSlowCase() const LLVM_READONLY;
+
+  /// out-of-line slow case for countPopulation
+  unsigned countPopulationSlowCase() const LLVM_READONLY;
+
+  /// out-of-line slow case for intersects.
+  bool intersectsSlowCase(const APInt &RHS) const LLVM_READONLY;
+
+  /// out-of-line slow case for isSubsetOf.
+  bool isSubsetOfSlowCase(const APInt &RHS) const LLVM_READONLY;
+
+  /// out-of-line slow case for setBits.
+  void setBitsSlowCase(unsigned loBit, unsigned hiBit);
+
+  /// out-of-line slow case for flipAllBits.
+  void flipAllBitsSlowCase();
+
+  /// out-of-line slow case for concat.
+  APInt concatSlowCase(const APInt &NewLSB) const;
+
+  /// out-of-line slow case for operator&=.
+  void andAssignSlowCase(const APInt &RHS);
 
-/// Magic data for optimising unsigned division by a constant.
-struct APInt::mu {
-  APInt m;    ///< magic number
-  bool a;     ///< add indicator
-  unsigned s; ///< shift amount
+  /// out-of-line slow case for operator|=.
+  void orAssignSlowCase(const APInt &RHS);
+
+  /// out-of-line slow case for operator^=.
+  void xorAssignSlowCase(const APInt &RHS);
+
+  /// Unsigned comparison. Returns -1, 0, or 1 if this APInt is less than, equal
+  /// to, or greater than RHS.
+  int compare(const APInt &RHS) const LLVM_READONLY;
+
+  /// Signed comparison. Returns -1, 0, or 1 if this APInt is less than, equal
+  /// to, or greater than RHS.
+  int compareSigned(const APInt &RHS) const LLVM_READONLY;
+
+  /// @}
 };
 
 inline bool operator==(uint64_t V1, const APInt &V2) { return V2 == V1; }
@@ -2161,7 +2112,6 @@ inline APInt operator*(uint64_t LHS, APInt b) {
   return b;
 }
 
-
 namespace APIntOps {
 
 /// Determine the smaller of two APInts considered to be signed.
@@ -2277,7 +2227,16 @@ Optional<APInt> SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
 Optional<unsigned> GetMostSignificantDifferentBit(const APInt &A,
                                                   const APInt &B);
 
-} // End of APIntOps namespace
+/// Splat/Merge neighboring bits to widen/narrow the bitmask represented
+/// by \param A to \param NewBitWidth bits.
+///
+/// e.g. ScaleBitMask(0b0101, 8) -> 0b00110011
+/// e.g. ScaleBitMask(0b00011011, 4) -> 0b0111
+/// A.getBitwidth() or NewBitWidth must be a whole multiples of the other.
+///
+/// TODO: Do we need a mode where all bits must be set when merging down?
+APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth);
+} // namespace APIntOps
 
 // See friend declaration above. This additional declaration is required in
 // order to compile LLVM with IBM xlC compiler.
@@ -2292,7 +2251,7 @@ void StoreIntToMemory(const APInt &IntVal, uint8_t *Dst, unsigned StoreBytes);
 void LoadIntFromMemory(APInt &IntVal, const uint8_t *Src, unsigned LoadBytes);
 
 /// Provide DenseMapInfo for APInt.
-template <> struct DenseMapInfo<APInt> {
+template <> struct DenseMapInfo<APInt, void> {
   static inline APInt getEmptyKey() {
     APInt V(nullptr, 0);
     V.U.VAL = 0;
diff --git a/llvm/include/llvm/ADT/APSInt.h b/llvm/include/llvm/ADT/APSInt.h
index 1509d472f131..c1cf3c546070 100644
--- a/llvm/include/llvm/ADT/APSInt.h
+++ b/llvm/include/llvm/ADT/APSInt.h
@@ -58,7 +58,7 @@ public:
   /// that 0 is not a positive value.
   ///
   /// \returns true if this APSInt is positive.
-  bool isStrictlyPositive() const { return isNonNegative() && !isNullValue(); }
+  bool isStrictlyPositive() const { return isNonNegative() && !isZero(); }
 
   APSInt &operator=(APInt RHS) {
     // Retain our current sign.
@@ -344,17 +344,17 @@ inline raw_ostream &operator<<(raw_ostream &OS, const APSInt &I) {
 }
 
 /// Provide DenseMapInfo for APSInt, using the DenseMapInfo for APInt.
-template <> struct DenseMapInfo<APSInt> {
+template <> struct DenseMapInfo<APSInt, void> {
   static inline APSInt getEmptyKey() {
-    return APSInt(DenseMapInfo<APInt>::getEmptyKey());
+    return APSInt(DenseMapInfo<APInt, void>::getEmptyKey());
   }
 
   static inline APSInt getTombstoneKey() {
-    return APSInt(DenseMapInfo<APInt>::getTombstoneKey());
+    return APSInt(DenseMapInfo<APInt, void>::getTombstoneKey());
   }
 
   static unsigned getHashValue(const APSInt &Key) {
-    return DenseMapInfo<APInt>::getHashValue(Key);
+    return DenseMapInfo<APInt, void>::getHashValue(Key);
   }
 
   static bool isEqual(const APSInt &LHS, const APSInt &RHS) {
diff --git a/llvm/include/llvm/ADT/ArrayRef.h b/llvm/include/llvm/ADT/ArrayRef.h
index 2df49223c987..61f85cfc812b 100644
--- a/llvm/include/llvm/ADT/ArrayRef.h
+++ b/llvm/include/llvm/ADT/ArrayRef.h
@@ -26,8 +26,6 @@
 
 namespace llvm {
 
-  template<typename T> struct DenseMapInfo;
-
   /// ArrayRef - Represent a constant reference to an array (0 or more elements
   /// consecutively in memory), i.e. a start pointer and a length.  It allows
   /// various APIs to take consecutive elements easily and conveniently.
@@ -572,7 +570,7 @@ namespace llvm {
   }
 
   // Provide DenseMapInfo for ArrayRefs.
-  template <typename T> struct DenseMapInfo<ArrayRef<T>> {
+  template <typename T> struct DenseMapInfo<ArrayRef<T>, void> {
     static inline ArrayRef<T> getEmptyKey() {
       return ArrayRef<T>(
           reinterpret_cast<const T *>(~static_cast<uintptr_t>(0)), size_t(0));
diff --git a/llvm/include/llvm/ADT/BitVector.h b/llvm/include/llvm/ADT/BitVector.h
index 31d388073633..cd1964cbdd98 100644
--- a/llvm/include/llvm/ADT/BitVector.h
+++ b/llvm/include/llvm/ADT/BitVector.h
@@ -85,7 +85,7 @@ class BitVector {
   unsigned Size; // Size of bitvector in bits.
 
 public:
-  typedef unsigned size_type;
+  using size_type = unsigned;
 
   // Encapsulation of a single bit.
   class reference {
@@ -536,8 +536,8 @@ public:
                [&Arg](auto const &BV) { return Arg.size() == BV; }) &&
            "consistent sizes");
     Out.resize(Arg.size());
-    for (size_t i = 0, e = Arg.Bits.size(); i != e; ++i)
-      Out.Bits[i] = f(Arg.Bits[i], Args.Bits[i]...);
+    for (size_type I = 0, E = Arg.Bits.size(); I != E; ++I)
+      Out.Bits[I] = f(Arg.Bits[I], Args.Bits[I]...);
     Out.clear_unused_bits();
     return Out;
   }
@@ -545,16 +545,16 @@ public:
   BitVector &operator|=(const BitVector &RHS) {
     if (size() < RHS.size())
       resize(RHS.size());
-    for (size_t i = 0, e = RHS.Bits.size(); i != e; ++i)
-      Bits[i] |= RHS.Bits[i];
+    for (size_type I = 0, E = RHS.Bits.size(); I != E; ++I)
+      Bits[I] |= RHS.Bits[I];
     return *this;
   }
 
   BitVector &operator^=(const BitVector &RHS) {
     if (size() < RHS.size())
       resize(RHS.size());
-    for (size_t i = 0, e = RHS.Bits.size(); i != e; ++i)
-      Bits[i] ^= RHS.Bits[i];
+    for (size_type I = 0, E = RHS.Bits.size(); I != E; ++I)
+      Bits[I] ^= RHS.Bits[I];
     return *this;
   }
 
@@ -808,11 +808,11 @@ private:
 
 public:
   /// Return the size (in bytes) of the bit vector.
-  size_t getMemorySize() const { return Bits.size() * sizeof(BitWord); }
-  size_t getBitCapacity() const { return Bits.size() * BITWORD_SIZE; }
+  size_type getMemorySize() const { return Bits.size() * sizeof(BitWord); }
+  size_type getBitCapacity() const { return Bits.size() * BITWORD_SIZE; }
 };
 
-inline size_t capacity_in_bytes(const BitVector &X) {
+inline BitVector::size_type capacity_in_bytes(const BitVector &X) {
   return X.getMemorySize();
 }
 
@@ -824,8 +824,8 @@ template <> struct DenseMapInfo<BitVector> {
     return V;
   }
   static unsigned getHashValue(const BitVector &V) {
-    return DenseMapInfo<std::pair<unsigned, ArrayRef<uintptr_t>>>::getHashValue(
-        std::make_pair(V.size(), V.getData()));
+    return DenseMapInfo<std::pair<BitVector::size_type, ArrayRef<uintptr_t>>>::
+        getHashValue(std::make_pair(V.size(), V.getData()));
   }
   static bool isEqual(const BitVector &LHS, const BitVector &RHS) {
     if (LHS.isInvalid() || RHS.isInvalid())
diff --git a/llvm/include/llvm/ADT/CombinationGenerator.h b/llvm/include/llvm/ADT/CombinationGenerator.h
new file mode 100644
index 000000000000..ab6afd555726
--- /dev/null
+++ b/llvm/include/llvm/ADT/CombinationGenerator.h
@@ -0,0 +1,148 @@
+//===-- llvm/ADT/CombinationGenerator.h ------------------------*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Combination generator.
+///
+/// Example: given input {{0, 1}, {2}, {3, 4}} it will produce the following
+/// combinations: {0, 2, 3}, {0, 2, 4}, {1, 2, 3}, {1, 2, 4}.
+///
+/// It is useful to think of input as vector-of-vectors, where the
+/// outer vector is the variable space, and inner vector is choice space.
+/// The number of choices for each variable can be different.
+///
+/// As for implementation, it is useful to think of this as a weird number,
+/// where each digit (==variable) may have different base (==number of choices).
+/// Thus modelling of 'produce next combination' is exactly analogous to the
+/// incrementing of an number - increment lowest digit (pick next choice for the
+/// variable), and if it wrapped to the beginning then increment next digit.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_COMBINATIONGENERATOR_H
+#define LLVM_ADT_COMBINATIONGENERATOR_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include <cassert>
+#include <cstring>
+
+namespace llvm {
+
+template <typename choice_type, typename choices_storage_type,
+          int variable_smallsize>
+class CombinationGenerator {
+  template <typename T> struct WrappingIterator {
+    using value_type = T;
+
+    const ArrayRef<value_type> Range;
+    typename decltype(Range)::const_iterator Position;
+
+    // Rewind the tape, placing the position to again point at the beginning.
+    void rewind() { Position = Range.begin(); }
+
+    // Advance position forward, possibly wrapping to the beginning.
+    // Returns whether the wrap happened.
+    bool advance() {
+      ++Position;
+      bool Wrapped = Position == Range.end();
+      if (Wrapped)
+        rewind();
+      return Wrapped;
+    }
+
+    // Get the value at which we are currently pointing.
+    const value_type &operator*() const { return *Position; }
+
+    WrappingIterator(ArrayRef<value_type> Range_) : Range(Range_) {
+      assert(!Range.empty() && "The range must not be empty.");
+      rewind();
+    }
+  };
+
+  const ArrayRef<choices_storage_type> VariablesChoices;
+
+  void performGeneration(
+      const function_ref<bool(ArrayRef<choice_type>)> Callback) const {
+    SmallVector<WrappingIterator<choice_type>, variable_smallsize>
+        VariablesState;
+
+    // 'increment' of the the whole VariablesState is defined identically to the
+    // increment of a number: starting from the least significant element,
+    // increment it, and if it wrapped, then propagate that carry by also
+    // incrementing next (more significant) element.
+    auto IncrementState =
+        [](MutableArrayRef<WrappingIterator<choice_type>> VariablesState)
+        -> bool {
+      for (WrappingIterator<choice_type> &Variable :
+           llvm::reverse(VariablesState)) {
+        bool Wrapped = Variable.advance();
+        if (!Wrapped)
+          return false; // There you go, next combination is ready.
+        // We have carry - increment more significant variable next..
+      }
+      return true; // MSB variable wrapped, no more unique combinations.
+    };
+
+    // Initialize the per-variable state to refer to the possible choices for
+    // that variable.
+    VariablesState.reserve(VariablesChoices.size());
+    for (ArrayRef<choice_type> VC : VariablesChoices)
+      VariablesState.emplace_back(VC);
+
+    // Temporary buffer to store each combination before performing Callback.
+    SmallVector<choice_type, variable_smallsize> CurrentCombination;
+    CurrentCombination.resize(VariablesState.size());
+
+    while (true) {
+      // Gather the currently-selected variable choices into a vector.
+      for (auto I : llvm::zip(VariablesState, CurrentCombination))
+        std::get<1>(I) = *std::get<0>(I);
+      // And pass the new combination into callback, as intended.
+      if (/*Abort=*/Callback(CurrentCombination))
+        return;
+      // And tick the state to next combination, which will be unique.
+      if (IncrementState(VariablesState))
+        return; // All combinations produced.
+    }
+  };
+
+public:
+  CombinationGenerator(ArrayRef<choices_storage_type> VariablesChoices_)
+      : VariablesChoices(VariablesChoices_) {
+#ifndef NDEBUG
+    assert(!VariablesChoices.empty() && "There should be some variables.");
+    llvm::for_each(VariablesChoices, [](ArrayRef<choice_type> VariableChoices) {
+      assert(!VariableChoices.empty() &&
+             "There must always be some choice, at least a placeholder one.");
+    });
+#endif
+  }
+
+  // How many combinations can we produce, max?
+  // This is at most how many times the callback will be called.
+  size_t numCombinations() const {
+    size_t NumVariants = 1;
+    for (ArrayRef<choice_type> VariableChoices : VariablesChoices)
+      NumVariants *= VariableChoices.size();
+    assert(NumVariants >= 1 &&
+           "We should always end up producing at least one combination");
+    return NumVariants;
+  }
+
+  // Actually perform exhaustive combination generation.
+  // Each result will be passed into the callback.
+  void generate(const function_ref<bool(ArrayRef<choice_type>)> Callback) {
+    performGeneration(Callback);
+  }
+};
+
+} // namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/ADT/DenseMapInfo.h b/llvm/include/llvm/ADT/DenseMapInfo.h
index d276acbfa6a6..75b7371a3683 100644
--- a/llvm/include/llvm/ADT/DenseMapInfo.h
+++ b/llvm/include/llvm/ADT/DenseMapInfo.h
@@ -13,10 +13,10 @@
 #ifndef LLVM_ADT_DENSEMAPINFO_H
 #define LLVM_ADT_DENSEMAPINFO_H
 
-#include "llvm/ADT/Hashing.h"
 #include <cassert>
 #include <cstddef>
 #include <cstdint>
+#include <tuple>
 #include <utility>
 
 namespace llvm {
@@ -39,7 +39,12 @@ static inline unsigned combineHashValue(unsigned a, unsigned b) {
 
 } // end namespace detail
 
-template<typename T>
+/// An information struct used to provide DenseMap with the various necessary
+/// components for a given value type `T`. `Enable` is an optional additional
+/// parameter that is used to support SFINAE (generally using std::enable_if_t)
+/// in derived DenseMapInfo specializations; in non-SFINAE use cases this should
+/// just be `void`.
+template<typename T, typename Enable = void>
 struct DenseMapInfo {
   //static inline T getEmptyKey();
   //static inline T getTombstoneKey();
@@ -282,13 +287,6 @@ template <typename... Ts> struct DenseMapInfo<std::tuple<Ts...>> {
   }
 };
 
-template <> struct DenseMapInfo<hash_code> {
-  static inline hash_code getEmptyKey() { return hash_code(-1); }
-  static inline hash_code getTombstoneKey() { return hash_code(-2); }
-  static unsigned getHashValue(hash_code val) { return val; }
-  static bool isEqual(hash_code LHS, hash_code RHS) { return LHS == RHS; }
-};
-
 } // end namespace llvm
 
 #endif // LLVM_ADT_DENSEMAPINFO_H
diff --git a/llvm/include/llvm/ADT/EquivalenceClasses.h b/llvm/include/llvm/ADT/EquivalenceClasses.h
index 273b00f99d5d..de6bb3bca7e3 100644
--- a/llvm/include/llvm/ADT/EquivalenceClasses.h
+++ b/llvm/include/llvm/ADT/EquivalenceClasses.h
@@ -30,7 +30,8 @@ namespace llvm {
 ///
 /// This implementation is an efficient implementation that only stores one copy
 /// of the element being indexed per entry in the set, and allows any arbitrary
-/// type to be indexed (as long as it can be ordered with operator<).
+/// type to be indexed (as long as it can be ordered with operator< or a
+/// comparator is provided).
 ///
 /// Here is a simple example using integers:
 ///
@@ -54,7 +55,7 @@ namespace llvm {
 ///   4
 ///   5 1 2
 ///
-template <class ElemTy>
+template <class ElemTy, class Compare = std::less<ElemTy>>
 class EquivalenceClasses {
   /// ECValue - The EquivalenceClasses data structure is just a set of these.
   /// Each of these represents a relation for a value.  First it stores the
@@ -101,22 +102,40 @@ class EquivalenceClasses {
       assert(RHS.isLeader() && RHS.getNext() == nullptr && "Not a singleton!");
     }
 
-    bool operator<(const ECValue &UFN) const { return Data < UFN.Data; }
-
     bool isLeader() const { return (intptr_t)Next & 1; }
     const ElemTy &getData() const { return Data; }
 
     const ECValue *getNext() const {
       return (ECValue*)((intptr_t)Next & ~(intptr_t)1);
     }
+  };
+
+  /// A wrapper of the comparator, to be passed to the set.
+  struct ECValueComparator {
+    using is_transparent = void;
+
+    ECValueComparator() : compare(Compare()) {}
+
+    bool operator()(const ECValue &lhs, const ECValue &rhs) const {
+      return compare(lhs.Data, rhs.Data);
+    }
+
+    template <typename T>
+    bool operator()(const T &lhs, const ECValue &rhs) const {
+      return compare(lhs, rhs.Data);
+    }
+
+    template <typename T>
+    bool operator()(const ECValue &lhs, const T &rhs) const {
+      return compare(lhs.Data, rhs);
+    }
 
-    template<typename T>
-    bool operator<(const T &Val) const { return Data < Val; }
+    const Compare compare;
   };
 
   /// TheMapping - This implicitly provides a mapping from ElemTy values to the
   /// ECValues, it just keeps the key as part of the value.
-  std::set<ECValue> TheMapping;
+  std::set<ECValue, ECValueComparator> TheMapping;
 
 public:
   EquivalenceClasses() = default;
diff --git a/llvm/include/llvm/ADT/FunctionExtras.h b/llvm/include/llvm/ADT/FunctionExtras.h
index e67ef7377c88..5a37417ddde5 100644
--- a/llvm/include/llvm/ADT/FunctionExtras.h
+++ b/llvm/include/llvm/ADT/FunctionExtras.h
@@ -37,6 +37,7 @@
 #include "llvm/ADT/STLForwardCompat.h"
 #include "llvm/Support/MemAlloc.h"
 #include "llvm/Support/type_traits.h"
+#include <cstring>
 #include <memory>
 #include <type_traits>
 
@@ -64,11 +65,16 @@ template <typename CallableT, typename ThisT>
 using EnableUnlessSameType =
     std::enable_if_t<!std::is_same<remove_cvref_t<CallableT>, ThisT>::value>;
 template <typename CallableT, typename Ret, typename... Params>
-using EnableIfCallable =
-    std::enable_if_t<std::is_void<Ret>::value ||
-                     std::is_convertible<decltype(std::declval<CallableT>()(
-                                             std::declval<Params>()...)),
-                                         Ret>::value>;
+using EnableIfCallable = std::enable_if_t<llvm::disjunction<
+    std::is_void<Ret>,
+    std::is_same<decltype(std::declval<CallableT>()(std::declval<Params>()...)),
+                 Ret>,
+    std::is_same<const decltype(std::declval<CallableT>()(
+                     std::declval<Params>()...)),
+                 Ret>,
+    std::is_convertible<decltype(std::declval<CallableT>()(
+                            std::declval<Params>()...)),
+                        Ret>>::value>;
 
 template <typename ReturnT, typename... ParamTs> class UniqueFunctionBase {
 protected:
diff --git a/llvm/include/llvm/ADT/Hashing.h b/llvm/include/llvm/ADT/Hashing.h
index e296c1c53ebd..74a87a3d8dbb 100644
--- a/llvm/include/llvm/ADT/Hashing.h
+++ b/llvm/include/llvm/ADT/Hashing.h
@@ -56,6 +56,7 @@
 #include <utility>
 
 namespace llvm {
+template <typename T, typename Enable> struct DenseMapInfo;
 
 /// An opaque object representing a hash code.
 ///
@@ -677,6 +678,13 @@ hash_code hash_value(const std::basic_string<T> &arg) {
   return hash_combine_range(arg.begin(), arg.end());
 }
 
+template <> struct DenseMapInfo<hash_code, void> {
+  static inline hash_code getEmptyKey() { return hash_code(-1); }
+  static inline hash_code getTombstoneKey() { return hash_code(-2); }
+  static unsigned getHashValue(hash_code val) { return val; }
+  static bool isEqual(hash_code LHS, hash_code RHS) { return LHS == RHS; }
+};
+
 } // namespace llvm
 
 #endif
diff --git a/llvm/include/llvm/ADT/ImmutableList.h b/llvm/include/llvm/ADT/ImmutableList.h
index c9ee494734e7..cf27c5a16d28 100644
--- a/llvm/include/llvm/ADT/ImmutableList.h
+++ b/llvm/include/llvm/ADT/ImmutableList.h
@@ -220,8 +220,7 @@ public:
 // Partially-specialized Traits.
 //===----------------------------------------------------------------------===//
 
-template<typename T> struct DenseMapInfo;
-template<typename T> struct DenseMapInfo<ImmutableList<T>> {
+template <typename T> struct DenseMapInfo<ImmutableList<T>, void> {
   static inline ImmutableList<T> getEmptyKey() {
     return reinterpret_cast<ImmutableListImpl<T>*>(-1);
   }
diff --git a/llvm/include/llvm/ADT/IntervalMap.h b/llvm/include/llvm/ADT/IntervalMap.h
index 26a7ed0cd333..3c107a3622a9 100644
--- a/llvm/include/llvm/ADT/IntervalMap.h
+++ b/llvm/include/llvm/ADT/IntervalMap.h
@@ -1137,7 +1137,7 @@ public:
 
   /// overlaps(a, b) - Return true if the intervals in this map overlap with the
   /// interval [a;b].
-  bool overlaps(KeyT a, KeyT b) {
+  bool overlaps(KeyT a, KeyT b) const {
     assert(Traits::nonEmpty(a, b));
     const_iterator I = find(a);
     if (!I.valid())
diff --git a/llvm/include/llvm/ADT/MapVector.h b/llvm/include/llvm/ADT/MapVector.h
index 1de1124f4ea2..f9540999381a 100644
--- a/llvm/include/llvm/ADT/MapVector.h
+++ b/llvm/include/llvm/ADT/MapVector.h
@@ -43,6 +43,7 @@ class MapVector {
       "The mapped_type of the specified Map must be an integral type");
 
 public:
+  using key_type = KeyT;
   using value_type = typename VectorType::value_type;
   using size_type = typename VectorType::size_type;
 
diff --git a/llvm/include/llvm/ADT/PointerIntPair.h b/llvm/include/llvm/ADT/PointerIntPair.h
index cb8b202c48b7..393ace6b70fc 100644
--- a/llvm/include/llvm/ADT/PointerIntPair.h
+++ b/llvm/include/llvm/ADT/PointerIntPair.h
@@ -22,7 +22,7 @@
 
 namespace llvm {
 
-template <typename T> struct DenseMapInfo;
+template <typename T, typename Enable> struct DenseMapInfo;
 template <typename PointerT, unsigned IntBits, typename PtrTraits>
 struct PointerIntPairInfo;
 
@@ -192,7 +192,7 @@ struct PointerIntPairInfo {
 
 // Provide specialization of DenseMapInfo for PointerIntPair.
 template <typename PointerTy, unsigned IntBits, typename IntType>
-struct DenseMapInfo<PointerIntPair<PointerTy, IntBits, IntType>> {
+struct DenseMapInfo<PointerIntPair<PointerTy, IntBits, IntType>, void> {
   using Ty = PointerIntPair<PointerTy, IntBits, IntType>;
 
   static Ty getEmptyKey() {
diff --git a/llvm/include/llvm/ADT/PointerUnion.h b/llvm/include/llvm/ADT/PointerUnion.h
index c39691061b72..0874f67db3fe 100644
--- a/llvm/include/llvm/ADT/PointerUnion.h
+++ b/llvm/include/llvm/ADT/PointerUnion.h
@@ -17,42 +17,13 @@
 #include "llvm/ADT/DenseMapInfo.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/Support/PointerLikeTypeTraits.h"
+#include <algorithm>
 #include <cassert>
 #include <cstddef>
 #include <cstdint>
 
 namespace llvm {
 
-template <typename T> struct PointerUnionTypeSelectorReturn {
-  using Return = T;
-};
-
-/// Get a type based on whether two types are the same or not.
-///
-/// For:
-///
-/// \code
-///   using Ret = typename PointerUnionTypeSelector<T1, T2, EQ, NE>::Return;
-/// \endcode
-///
-/// Ret will be EQ type if T1 is same as T2 or NE type otherwise.
-template <typename T1, typename T2, typename RET_EQ, typename RET_NE>
-struct PointerUnionTypeSelector {
-  using Return = typename PointerUnionTypeSelectorReturn<RET_NE>::Return;
-};
-
-template <typename T, typename RET_EQ, typename RET_NE>
-struct PointerUnionTypeSelector<T, T, RET_EQ, RET_NE> {
-  using Return = typename PointerUnionTypeSelectorReturn<RET_EQ>::Return;
-};
-
-template <typename T1, typename T2, typename RET_EQ, typename RET_NE>
-struct PointerUnionTypeSelectorReturn<
-    PointerUnionTypeSelector<T1, T2, RET_EQ, RET_NE>> {
-  using Return =
-      typename PointerUnionTypeSelector<T1, T2, RET_EQ, RET_NE>::Return;
-};
-
 namespace pointer_union_detail {
   /// Determine the number of bits required to store integers with values < n.
   /// This is ceil(log2(n)).
diff --git a/llvm/include/llvm/ADT/STLExtras.h b/llvm/include/llvm/ADT/STLExtras.h
index eb001346b609..48f15b02283a 100644
--- a/llvm/include/llvm/ADT/STLExtras.h
+++ b/llvm/include/llvm/ADT/STLExtras.h
@@ -272,20 +272,24 @@ template <typename T> auto drop_begin(T &&RangeOrContainer, size_t N = 1) {
 // be applied whenever operator* is invoked on the iterator.
 
 template <typename ItTy, typename FuncTy,
-          typename FuncReturnTy =
-            decltype(std::declval<FuncTy>()(*std::declval<ItTy>()))>
+          typename ReferenceTy =
+              decltype(std::declval<FuncTy>()(*std::declval<ItTy>()))>
 class mapped_iterator
     : public iterator_adaptor_base<
-             mapped_iterator<ItTy, FuncTy>, ItTy,
-             typename std::iterator_traits<ItTy>::iterator_category,
-             typename std::remove_reference<FuncReturnTy>::type> {
+          mapped_iterator<ItTy, FuncTy>, ItTy,
+          typename std::iterator_traits<ItTy>::iterator_category,
+          std::remove_reference_t<ReferenceTy>,
+          typename std::iterator_traits<ItTy>::difference_type,
+          std::remove_reference_t<ReferenceTy> *, ReferenceTy> {
 public:
   mapped_iterator(ItTy U, FuncTy F)
     : mapped_iterator::iterator_adaptor_base(std::move(U)), F(std::move(F)) {}
 
   ItTy getCurrent() { return this->I; }
 
-  FuncReturnTy operator*() const { return F(*this->I); }
+  const FuncTy &getFunction() const { return F; }
+
+  ReferenceTy operator*() const { return F(*this->I); }
 
 private:
   FuncTy F;
@@ -303,6 +307,32 @@ auto map_range(ContainerTy &&C, FuncTy F) {
   return make_range(map_iterator(C.begin(), F), map_iterator(C.end(), F));
 }
 
+/// A base type of mapped iterator, that is useful for building derived
+/// iterators that do not need/want to store the map function (as in
+/// mapped_iterator). These iterators must simply provide a `mapElement` method
+/// that defines how to map a value of the iterator to the provided reference
+/// type.
+template <typename DerivedT, typename ItTy, typename ReferenceTy>
+class mapped_iterator_base
+    : public iterator_adaptor_base<
+          DerivedT, ItTy,
+          typename std::iterator_traits<ItTy>::iterator_category,
+          std::remove_reference_t<ReferenceTy>,
+          typename std::iterator_traits<ItTy>::difference_type,
+          std::remove_reference_t<ReferenceTy> *, ReferenceTy> {
+public:
+  using BaseT = mapped_iterator_base;
+
+  mapped_iterator_base(ItTy U)
+      : mapped_iterator_base::iterator_adaptor_base(std::move(U)) {}
+
+  ItTy getCurrent() { return this->I; }
+
+  ReferenceTy operator*() const {
+    return static_cast<const DerivedT &>(*this).mapElement(*this->I);
+  }
+};
+
 /// Helper to determine if type T has a member called rbegin().
 template <typename Ty> class has_rbegin_impl {
   using yes = char[1];
@@ -371,12 +401,7 @@ class filter_iterator_base
           typename std::common_type<
               IterTag, typename std::iterator_traits<
                            WrappedIteratorT>::iterator_category>::type> {
-  using BaseT = iterator_adaptor_base<
-      filter_iterator_base<WrappedIteratorT, PredicateT, IterTag>,
-      WrappedIteratorT,
-      typename std::common_type<
-          IterTag, typename std::iterator_traits<
-                       WrappedIteratorT>::iterator_category>::type>;
+  using BaseT = typename filter_iterator_base::iterator_adaptor_base;
 
 protected:
   WrappedIteratorT End;
@@ -411,12 +436,10 @@ template <typename WrappedIteratorT, typename PredicateT,
           typename IterTag = std::forward_iterator_tag>
 class filter_iterator_impl
     : public filter_iterator_base<WrappedIteratorT, PredicateT, IterTag> {
-  using BaseT = filter_iterator_base<WrappedIteratorT, PredicateT, IterTag>;
-
 public:
   filter_iterator_impl(WrappedIteratorT Begin, WrappedIteratorT End,
                        PredicateT Pred)
-      : BaseT(Begin, End, Pred) {}
+      : filter_iterator_impl::filter_iterator_base(Begin, End, Pred) {}
 };
 
 /// Specialization of filter_iterator_base for bidirectional iteration.
@@ -425,8 +448,8 @@ class filter_iterator_impl<WrappedIteratorT, PredicateT,
                            std::bidirectional_iterator_tag>
     : public filter_iterator_base<WrappedIteratorT, PredicateT,
                                   std::bidirectional_iterator_tag> {
-  using BaseT = filter_iterator_base<WrappedIteratorT, PredicateT,
-                                     std::bidirectional_iterator_tag>;
+  using BaseT = typename filter_iterator_impl::filter_iterator_base;
+
   void findPrevValid() {
     while (!this->Pred(*this->I))
       BaseT::operator--();
@@ -514,9 +537,7 @@ template <typename WrappedIteratorT>
 class early_inc_iterator_impl
     : public iterator_adaptor_base<early_inc_iterator_impl<WrappedIteratorT>,
                                    WrappedIteratorT, std::input_iterator_tag> {
-  using BaseT =
-      iterator_adaptor_base<early_inc_iterator_impl<WrappedIteratorT>,
-                            WrappedIteratorT, std::input_iterator_tag>;
+  using BaseT = typename early_inc_iterator_impl::iterator_adaptor_base;
 
   using PointerT = typename std::iterator_traits<WrappedIteratorT>::pointer;
 
@@ -630,12 +651,18 @@ protected:
     return std::tuple<Iters...>(std::prev(std::get<Ns>(iterators))...);
   }
 
+  template <size_t... Ns>
+  bool test_all_equals(const zip_common &other,
+            std::index_sequence<Ns...>) const {
+    return all_of(std::initializer_list<bool>{std::get<Ns>(this->iterators) ==
+                                              std::get<Ns>(other.iterators)...},
+                  identity<bool>{});
+  }
+
 public:
   zip_common(Iters &&... ts) : iterators(std::forward<Iters>(ts)...) {}
 
-  value_type operator*() { return deref(std::index_sequence_for<Iters...>{}); }
-
-  const value_type operator*() const {
+  value_type operator*() const {
     return deref(std::index_sequence_for<Iters...>{});
   }
 
@@ -650,6 +677,11 @@ public:
     iterators = tup_dec(std::index_sequence_for<Iters...>{});
     return *reinterpret_cast<ZipType *>(this);
   }
+
+  /// Return true if all the iterator are matching `other`'s iterators.
+  bool all_equals(zip_common &other) {
+    return test_all_equals(other, std::index_sequence_for<Iters...>{});
+  }
 };
 
 template <typename... Iters>
@@ -801,8 +833,6 @@ public:
       : iterators(std::forward<Iters>(ts.first)...),
         end_iterators(std::forward<Iters>(ts.second)...) {}
 
-  value_type operator*() { return deref(std::index_sequence_for<Iters...>{}); }
-
   value_type operator*() const {
     return deref(std::index_sequence_for<Iters...>{});
   }
@@ -1073,8 +1103,7 @@ template <typename DerivedT, typename BaseT, typename T,
           typename PointerT = T *, typename ReferenceT = T &>
 class indexed_accessor_range_base {
 public:
-  using RangeBaseT =
-      indexed_accessor_range_base<DerivedT, BaseT, T, PointerT, ReferenceT>;
+  using RangeBaseT = indexed_accessor_range_base;
 
   /// An iterator element of this range.
   class iterator : public indexed_accessor_iterator<iterator, BaseT, T,
@@ -1087,8 +1116,7 @@ public:
 
   private:
     iterator(BaseT owner, ptrdiff_t curIndex)
-        : indexed_accessor_iterator<iterator, BaseT, T, PointerT, ReferenceT>(
-              owner, curIndex) {}
+        : iterator::indexed_accessor_iterator(owner, curIndex) {}
 
     /// Allow access to the constructor.
     friend indexed_accessor_range_base<DerivedT, BaseT, T, PointerT,
@@ -1234,20 +1262,39 @@ public:
   }
 };
 
+namespace detail {
+/// Return a reference to the first or second member of a reference. Otherwise,
+/// return a copy of the member of a temporary.
+///
+/// When passing a range whose iterators return values instead of references,
+/// the reference must be dropped from `decltype((elt.first))`, which will
+/// always be a reference, to avoid returning a reference to a temporary.
+template <typename EltTy, typename FirstTy> class first_or_second_type {
+public:
+  using type =
+      typename std::conditional_t<std::is_reference<EltTy>::value, FirstTy,
+                                  std::remove_reference_t<FirstTy>>;
+};
+} // end namespace detail
+
 /// Given a container of pairs, return a range over the first elements.
 template <typename ContainerTy> auto make_first_range(ContainerTy &&c) {
-  return llvm::map_range(
-      std::forward<ContainerTy>(c),
-      [](decltype((*std::begin(c))) elt) -> decltype((elt.first)) {
-        return elt.first;
-      });
+  using EltTy = decltype((*std::begin(c)));
+  return llvm::map_range(std::forward<ContainerTy>(c),
+                         [](EltTy elt) -> typename detail::first_or_second_type<
+                                           EltTy, decltype((elt.first))>::type {
+                           return elt.first;
+                         });
 }
 
 /// Given a container of pairs, return a range over the second elements.
 template <typename ContainerTy> auto make_second_range(ContainerTy &&c) {
+  using EltTy = decltype((*std::begin(c)));
   return llvm::map_range(
       std::forward<ContainerTy>(c),
-      [](decltype((*std::begin(c))) elt) -> decltype((elt.second)) {
+      [](EltTy elt) ->
+      typename detail::first_or_second_type<EltTy,
+                                            decltype((elt.second))>::type {
         return elt.second;
       });
 }
@@ -1260,7 +1307,7 @@ template <typename ContainerTy> auto make_second_range(ContainerTy &&c) {
 /// compares less than the first component of another std::pair.
 struct less_first {
   template <typename T> bool operator()(const T &lhs, const T &rhs) const {
-    return lhs.first < rhs.first;
+    return std::less<>()(lhs.first, rhs.first);
   }
 };
 
@@ -1268,7 +1315,7 @@ struct less_first {
 /// compares less than the second component of another std::pair.
 struct less_second {
   template <typename T> bool operator()(const T &lhs, const T &rhs) const {
-    return lhs.second < rhs.second;
+    return std::less<>()(lhs.second, rhs.second);
   }
 };
 
@@ -1877,8 +1924,7 @@ template <typename R> struct result_pair {
   }
 
   std::size_t index() const { return Index; }
-  const value_reference value() const { return *Iter; }
-  value_reference value() { return *Iter; }
+  value_reference value() const { return *Iter; }
 
 private:
   std::size_t Index = std::numeric_limits<std::size_t>::max();
@@ -1887,11 +1933,8 @@ private:
 
 template <typename R>
 class enumerator_iter
-    : public iterator_facade_base<
-          enumerator_iter<R>, std::forward_iterator_tag, result_pair<R>,
-          typename std::iterator_traits<IterOfRange<R>>::difference_type,
-          typename std::iterator_traits<IterOfRange<R>>::pointer,
-          typename std::iterator_traits<IterOfRange<R>>::reference> {
+    : public iterator_facade_base<enumerator_iter<R>, std::forward_iterator_tag,
+                                  const result_pair<R>> {
   using result_type = result_pair<R>;
 
 public:
@@ -1901,7 +1944,6 @@ public:
   enumerator_iter(std::size_t Index, IterOfRange<R> Iter)
       : Result(Index, Iter) {}
 
-  result_type &operator*() { return Result; }
   const result_type &operator*() const { return Result; }
 
   enumerator_iter &operator++() {
@@ -1986,6 +2028,45 @@ decltype(auto) apply_tuple(F &&f, Tuple &&t) {
                                   Indices{});
 }
 
+namespace detail {
+
+template <typename Predicate, typename... Args>
+bool all_of_zip_predicate_first(Predicate &&P, Args &&...args) {
+  auto z = zip(args...);
+  auto it = z.begin();
+  auto end = z.end();
+  while (it != end) {
+    if (!apply_tuple([&](auto &&...args) { return P(args...); }, *it))
+      return false;
+    ++it;
+  }
+  return it.all_equals(end);
+}
+
+// Just an adaptor to switch the order of argument and have the predicate before
+// the zipped inputs.
+template <typename... ArgsThenPredicate, size_t... InputIndexes>
+bool all_of_zip_predicate_last(
+    std::tuple<ArgsThenPredicate...> argsThenPredicate,
+    std::index_sequence<InputIndexes...>) {
+  auto constexpr OutputIndex =
+      std::tuple_size<decltype(argsThenPredicate)>::value - 1;
+  return all_of_zip_predicate_first(std::get<OutputIndex>(argsThenPredicate),
+                             std::get<InputIndexes>(argsThenPredicate)...);
+}
+
+} // end namespace detail
+
+/// Compare two zipped ranges using the provided predicate (as last argument).
+/// Return true if all elements satisfy the predicate and false otherwise.
+//  Return false if the zipped iterator aren't all at end (size mismatch).
+template <typename... ArgsAndPredicate>
+bool all_of_zip(ArgsAndPredicate &&...argsAndPredicate) {
+  return detail::all_of_zip_predicate_last(
+      std::forward_as_tuple(argsAndPredicate...),
+      std::make_index_sequence<sizeof...(argsAndPredicate) - 1>{});
+}
+
 /// Return true if the sequence [Begin, End) has exactly N items. Runs in O(N)
 /// time. Not meant for use with random-access iterators.
 /// Can optionally take a predicate to filter lazily some items.
diff --git a/llvm/include/llvm/ADT/Sequence.h b/llvm/include/llvm/ADT/Sequence.h
index 3e4bf0932222..fdbf397984d0 100644
--- a/llvm/include/llvm/ADT/Sequence.h
+++ b/llvm/include/llvm/ADT/Sequence.h
@@ -6,9 +6,74 @@
 //
 //===----------------------------------------------------------------------===//
 /// \file
-/// This routine provides some synthesis utilities to produce sequences of
-/// values. The names are intentionally kept very short as they tend to occur
-/// in common and widely used contexts.
+/// Provides some synthesis utilities to produce sequences of values. The names
+/// are intentionally kept very short as they tend to occur in common and
+/// widely used contexts.
+///
+/// The `seq(A, B)` function produces a sequence of values from `A` to up to
+/// (but not including) `B`, i.e., [`A`, `B`), that can be safely iterated over.
+/// `seq` supports both integral (e.g., `int`, `char`, `uint32_t`) and enum
+/// types. `seq_inclusive(A, B)` produces a sequence of values from `A` to `B`,
+/// including `B`.
+///
+/// Examples with integral types:
+/// ```
+/// for (int x : seq(0, 3))
+///   outs() << x << " ";
+/// ```
+///
+/// Prints: `0 1 2 `.
+///
+/// ```
+/// for (int x : seq_inclusive(0, 3))
+///   outs() << x << " ";
+/// ```
+///
+/// Prints: `0 1 2 3 `.
+///
+/// Similar to `seq` and `seq_inclusive`, the `enum_seq` and
+/// `enum_seq_inclusive` functions produce sequences of enum values that can be
+/// iterated over.
+/// To enable iteration with enum types, you need to either mark enums as safe
+/// to iterate on by specializing `enum_iteration_traits`, or opt into
+/// potentially unsafe iteration at every callsite by passing
+/// `force_iteration_on_noniterable_enum`.
+///
+/// Examples with enum types:
+/// ```
+/// namespace X {
+///   enum class MyEnum : unsigned {A = 0, B, C};
+/// } // namespace X
+///
+/// template <> struct enum_iteration_traits<X::MyEnum> {
+///   static contexpr bool is_iterable = true;
+/// };
+///
+/// class MyClass {
+/// public:
+///   enum Safe { D = 3, E, F };
+///   enum MaybeUnsafe { G = 1, H = 2, I = 4 };
+/// };
+///
+/// template <> struct enum_iteration_traits<MyClass::Safe> {
+///   static contexpr bool is_iterable = true;
+/// };
+/// ```
+///
+/// ```
+///   for (auto v : enum_seq(MyClass::Safe::D, MyClass::Safe::F))
+///     outs() << int(v) << " ";
+/// ```
+///
+/// Prints: `3 4 `.
+///
+/// ```
+///   for (auto v : enum_seq(MyClass::MaybeUnsafe::H, MyClass::MaybeUnsafe::I,
+///                          force_iteration_on_noniterable_enum))
+///     outs() << int(v) << " ";
+/// ```
+///
+/// Prints: `2 3 `.
 ///
 //===----------------------------------------------------------------------===//
 
@@ -18,12 +83,31 @@
 #include <cassert>     // assert
 #include <iterator>    // std::random_access_iterator_tag
 #include <limits>      // std::numeric_limits
-#include <type_traits> // std::underlying_type, std::is_enum
+#include <type_traits> // std::is_integral, std::is_enum, std::underlying_type,
+                       // std::enable_if
 
 #include "llvm/Support/MathExtras.h" // AddOverflow / SubOverflow
 
 namespace llvm {
 
+// Enum traits that marks enums as safe or unsafe to iterate over.
+// By default, enum types are *not* considered safe for iteration.
+// To allow iteration for your enum type, provide a specialization with
+// `is_iterable` set to `true` in the `llvm` namespace.
+// Alternatively, you can pass the `force_iteration_on_noniterable_enum` tag
+// to `enum_seq` or `enum_seq_inclusive`.
+template <typename EnumT> struct enum_iteration_traits {
+  static constexpr bool is_iterable = false;
+};
+
+struct force_iteration_on_noniterable_enum_t {
+  explicit force_iteration_on_noniterable_enum_t() = default;
+};
+
+// TODO: Make this `inline` once we update to C++17 to avoid ORD violations.
+constexpr force_iteration_on_noniterable_enum_t
+    force_iteration_on_noniterable_enum;
+
 namespace detail {
 
 // Returns whether a value of type U can be represented with type T.
@@ -213,27 +297,81 @@ private:
   iterator PastEndValue;
 };
 
-/// Iterate over an integral/enum type from Begin up to - but not including -
-/// End.
-/// Note on enum iteration: `seq` will generate each consecutive value, even if
-/// no enumerator with that value exists.
+/// Iterate over an integral type from Begin up to - but not including - End.
 /// Note: Begin and End values have to be within [INTMAX_MIN, INTMAX_MAX] for
 /// forward iteration (resp. [INTMAX_MIN + 1, INTMAX_MAX] for reverse
 /// iteration).
-template <typename T> auto seq(T Begin, T End) {
+template <typename T, typename = std::enable_if_t<std::is_integral<T>::value &&
+                                                  !std::is_enum<T>::value>>
+auto seq(T Begin, T End) {
   return iota_range<T>(Begin, End, false);
 }
 
-/// Iterate over an integral/enum type from Begin to End inclusive.
-/// Note on enum iteration: `seq_inclusive` will generate each consecutive
-/// value, even if no enumerator with that value exists.
+/// Iterate over an integral type from Begin to End inclusive.
 /// Note: Begin and End values have to be within [INTMAX_MIN, INTMAX_MAX - 1]
 /// for forward iteration (resp. [INTMAX_MIN + 1, INTMAX_MAX - 1] for reverse
 /// iteration).
-template <typename T> auto seq_inclusive(T Begin, T End) {
+template <typename T, typename = std::enable_if_t<std::is_integral<T>::value &&
+                                                  !std::is_enum<T>::value>>
+auto seq_inclusive(T Begin, T End) {
   return iota_range<T>(Begin, End, true);
 }
 
+/// Iterate over an enum type from Begin up to - but not including - End.
+/// Note: `enum_seq` will generate each consecutive value, even if no
+/// enumerator with that value exists.
+/// Note: Begin and End values have to be within [INTMAX_MIN, INTMAX_MAX] for
+/// forward iteration (resp. [INTMAX_MIN + 1, INTMAX_MAX] for reverse
+/// iteration).
+template <typename EnumT,
+          typename = std::enable_if_t<std::is_enum<EnumT>::value>>
+auto enum_seq(EnumT Begin, EnumT End) {
+  static_assert(enum_iteration_traits<EnumT>::is_iterable,
+                "Enum type is not marked as iterable.");
+  return iota_range<EnumT>(Begin, End, false);
+}
+
+/// Iterate over an enum type from Begin up to - but not including - End, even
+/// when `EnumT` is not marked as safely iterable by `enum_iteration_traits`.
+/// Note: `enum_seq` will generate each consecutive value, even if no
+/// enumerator with that value exists.
+/// Note: Begin and End values have to be within [INTMAX_MIN, INTMAX_MAX] for
+/// forward iteration (resp. [INTMAX_MIN + 1, INTMAX_MAX] for reverse
+/// iteration).
+template <typename EnumT,
+          typename = std::enable_if_t<std::is_enum<EnumT>::value>>
+auto enum_seq(EnumT Begin, EnumT End, force_iteration_on_noniterable_enum_t) {
+  return iota_range<EnumT>(Begin, End, false);
+}
+
+/// Iterate over an enum type from Begin to End inclusive.
+/// Note: `enum_seq_inclusive` will generate each consecutive value, even if no
+/// enumerator with that value exists.
+/// Note: Begin and End values have to be within [INTMAX_MIN, INTMAX_MAX - 1]
+/// for forward iteration (resp. [INTMAX_MIN + 1, INTMAX_MAX - 1] for reverse
+/// iteration).
+template <typename EnumT,
+          typename = std::enable_if_t<std::is_enum<EnumT>::value>>
+auto enum_seq_inclusive(EnumT Begin, EnumT End) {
+  static_assert(enum_iteration_traits<EnumT>::is_iterable,
+                "Enum type is not marked as iterable.");
+  return iota_range<EnumT>(Begin, End, true);
+}
+
+/// Iterate over an enum type from Begin to End inclusive, even when `EnumT`
+/// is not marked as safely iterable by `enum_iteration_traits`.
+/// Note: `enum_seq_inclusive` will generate each consecutive value, even if no
+/// enumerator with that value exists.
+/// Note: Begin and End values have to be within [INTMAX_MIN, INTMAX_MAX - 1]
+/// for forward iteration (resp. [INTMAX_MIN + 1, INTMAX_MAX - 1] for reverse
+/// iteration).
+template <typename EnumT,
+          typename = std::enable_if_t<std::is_enum<EnumT>::value>>
+auto enum_seq_inclusive(EnumT Begin, EnumT End,
+                        force_iteration_on_noniterable_enum_t) {
+  return iota_range<EnumT>(Begin, End, true);
+}
+
 } // end namespace llvm
 
 #endif // LLVM_ADT_SEQUENCE_H
diff --git a/llvm/include/llvm/ADT/SetOperations.h b/llvm/include/llvm/ADT/SetOperations.h
index 62f1d26dc1c2..3e30b6bb83d3 100644
--- a/llvm/include/llvm/ADT/SetOperations.h
+++ b/llvm/include/llvm/ADT/SetOperations.h
@@ -77,15 +77,6 @@ bool set_is_subset(const S1Ty &S1, const S2Ty &S2) {
   return true;
 }
 
-/// set_is_strict_subset(A, B) - Return true iff A in B and and A != B
-///
-template <class S1Ty, class S2Ty>
-bool set_is_strict_subset(const S1Ty &S1, const S2Ty &S2) {
-  if (S1.size() >= S2.size())
-    return false;
-  return set_is_subset(S1, S2);
-}
-
 } // End llvm namespace
 
 #endif
diff --git a/llvm/include/llvm/ADT/SmallBitVector.h b/llvm/include/llvm/ADT/SmallBitVector.h
index f570bac23ad5..51ee5dbbce05 100644
--- a/llvm/include/llvm/ADT/SmallBitVector.h
+++ b/llvm/include/llvm/ADT/SmallBitVector.h
@@ -60,7 +60,7 @@ class SmallBitVector {
                 "Unsupported word size");
 
 public:
-  using size_type = unsigned;
+  using size_type = uintptr_t;
 
   // Encapsulation of a single bit.
   class reference {
@@ -96,7 +96,7 @@ private:
     return reinterpret_cast<BitVector *>(X);
   }
 
-  void switchToSmall(uintptr_t NewSmallBits, size_t NewSize) {
+  void switchToSmall(uintptr_t NewSmallBits, size_type NewSize) {
     X = 1;
     setSmallSize(NewSize);
     setSmallBits(NewSmallBits);
@@ -120,9 +120,11 @@ private:
   }
 
   // Return the size.
-  size_t getSmallSize() const { return getSmallRawBits() >> SmallNumDataBits; }
+  size_type getSmallSize() const {
+    return getSmallRawBits() >> SmallNumDataBits;
+  }
 
-  void setSmallSize(size_t Size) {
+  void setSmallSize(size_type Size) {
     setSmallRawBits(getSmallBits() | (Size << SmallNumDataBits));
   }
 
@@ -189,7 +191,7 @@ public:
   }
 
   /// Returns the number of bits in this bitvector.
-  size_t size() const {
+  size_type size() const {
     return isSmall() ? getSmallSize() : getPointer()->size();
   }
 
@@ -336,8 +338,8 @@ public:
     } else {
       BitVector *BV = new BitVector(N, t);
       uintptr_t OldBits = getSmallBits();
-      for (size_t i = 0, e = getSmallSize(); i != e; ++i)
-        (*BV)[i] = (OldBits >> i) & 1;
+      for (size_type I = 0, E = getSmallSize(); I != E; ++I)
+        (*BV)[I] = (OldBits >> I) & 1;
       switchToLarge(BV);
     }
   }
@@ -346,11 +348,11 @@ public:
     if (isSmall()) {
       if (N > SmallNumDataBits) {
         uintptr_t OldBits = getSmallRawBits();
-        size_t SmallSize = getSmallSize();
+        size_type SmallSize = getSmallSize();
         BitVector *BV = new BitVector(SmallSize);
-        for (size_t i = 0; i < SmallSize; ++i)
-          if ((OldBits >> i) & 1)
-            BV->set(i);
+        for (size_type I = 0; I < SmallSize; ++I)
+          if ((OldBits >> I) & 1)
+            BV->set(I);
         BV->reserve(N);
         switchToLarge(BV);
       }
@@ -491,8 +493,8 @@ public:
     else if (!isSmall() && !RHS.isSmall())
       return *getPointer() == *RHS.getPointer();
     else {
-      for (size_t i = 0, e = size(); i != e; ++i) {
-        if ((*this)[i] != RHS[i])
+      for (size_type I = 0, E = size(); I != E; ++I) {
+        if ((*this)[I] != RHS[I])
           return false;
       }
       return true;
@@ -512,11 +514,11 @@ public:
     else if (!isSmall() && !RHS.isSmall())
       getPointer()->operator&=(*RHS.getPointer());
     else {
-      size_t i, e;
-      for (i = 0, e = std::min(size(), RHS.size()); i != e; ++i)
-        (*this)[i] = test(i) && RHS.test(i);
-      for (e = size(); i != e; ++i)
-        reset(i);
+      size_type I, E;
+      for (I = 0, E = std::min(size(), RHS.size()); I != E; ++I)
+        (*this)[I] = test(I) && RHS.test(I);
+      for (E = size(); I != E; ++I)
+        reset(I);
     }
     return *this;
   }
@@ -561,8 +563,8 @@ public:
     else if (!isSmall() && !RHS.isSmall())
       getPointer()->operator|=(*RHS.getPointer());
     else {
-      for (size_t i = 0, e = RHS.size(); i != e; ++i)
-        (*this)[i] = test(i) || RHS.test(i);
+      for (size_type I = 0, E = RHS.size(); I != E; ++I)
+        (*this)[I] = test(I) || RHS.test(I);
     }
     return *this;
   }
@@ -574,8 +576,8 @@ public:
     else if (!isSmall() && !RHS.isSmall())
       getPointer()->operator^=(*RHS.getPointer());
     else {
-      for (size_t i = 0, e = RHS.size(); i != e; ++i)
-        (*this)[i] = test(i) != RHS.test(i);
+      for (size_type I = 0, E = RHS.size(); I != E; ++I)
+        (*this)[I] = test(I) != RHS.test(I);
     }
     return *this;
   }
@@ -721,8 +723,9 @@ template <> struct DenseMapInfo<SmallBitVector> {
   }
   static unsigned getHashValue(const SmallBitVector &V) {
     uintptr_t Store;
-    return DenseMapInfo<std::pair<unsigned, ArrayRef<uintptr_t>>>::getHashValue(
-        std::make_pair(V.size(), V.getData(Store)));
+    return DenseMapInfo<
+        std::pair<SmallBitVector::size_type, ArrayRef<uintptr_t>>>::
+        getHashValue(std::make_pair(V.size(), V.getData(Store)));
   }
   static bool isEqual(const SmallBitVector &LHS, const SmallBitVector &RHS) {
     if (LHS.isInvalid() || RHS.isInvalid())
diff --git a/llvm/include/llvm/ADT/SmallVector.h b/llvm/include/llvm/ADT/SmallVector.h
index b8a11030fc33..0d13524f25ce 100644
--- a/llvm/include/llvm/ADT/SmallVector.h
+++ b/llvm/include/llvm/ADT/SmallVector.h
@@ -1239,13 +1239,22 @@ inline size_t capacity_in_bytes(const SmallVector<T, N> &X) {
   return X.capacity_in_bytes();
 }
 
+template <typename RangeType>
+using ValueTypeFromRangeType =
+    typename std::remove_const<typename std::remove_reference<
+        decltype(*std::begin(std::declval<RangeType &>()))>::type>::type;
+
 /// Given a range of type R, iterate the entire range and return a
 /// SmallVector with elements of the vector.  This is useful, for example,
 /// when you want to iterate a range and then sort the results.
 template <unsigned Size, typename R>
-SmallVector<typename std::remove_const<typename std::remove_reference<
-                decltype(*std::begin(std::declval<R &>()))>::type>::type,
-            Size>
+SmallVector<ValueTypeFromRangeType<R>, Size> to_vector(R &&Range) {
+  return {std::begin(Range), std::end(Range)};
+}
+template <typename R>
+SmallVector<ValueTypeFromRangeType<R>,
+            CalculateSmallVectorDefaultInlinedElements<
+                ValueTypeFromRangeType<R>>::value>
 to_vector(R &&Range) {
   return {std::begin(Range), std::end(Range)};
 }
diff --git a/llvm/include/llvm/ADT/StringExtras.h b/llvm/include/llvm/ADT/StringExtras.h
index 6bda25b85313..2ca672e7855b 100644
--- a/llvm/include/llvm/ADT/StringExtras.h
+++ b/llvm/include/llvm/ADT/StringExtras.h
@@ -67,22 +67,27 @@ inline ArrayRef<uint8_t> arrayRefFromStringRef(StringRef Input) {
 ///
 /// If \p C is not a valid hex digit, -1U is returned.
 inline unsigned hexDigitValue(char C) {
-  struct HexTable {
-    unsigned LUT[255] = {};
-    constexpr HexTable() {
-      // Default initialize everything to invalid.
-      for (int i = 0; i < 255; ++i)
-        LUT[i] = ~0U;
-      // Initialize `0`-`9`.
-      for (int i = 0; i < 10; ++i)
-        LUT['0' + i] = i;
-      // Initialize `A`-`F` and `a`-`f`.
-      for (int i = 0; i < 6; ++i)
-        LUT['A' + i] = LUT['a' + i] = 10 + i;
-    }
+  /* clang-format off */
+  static const int16_t LUT[256] = {
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+     0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1,  // '0'..'9'
+    -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,  // 'A'..'F'
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,  // 'a'..'f'
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
   };
-  constexpr HexTable Table;
-  return Table.LUT[static_cast<unsigned char>(C)];
+  /* clang-format on */
+  return LUT[static_cast<unsigned char>(C)];
 }
 
 /// Checks if character \p C is one of the 10 decimal digits.
@@ -210,24 +215,31 @@ inline bool tryGetFromHex(StringRef Input, std::string &Output) {
   if (Input.empty())
     return true;
 
-  Output.reserve((Input.size() + 1) / 2);
+  // If the input string is not properly aligned on 2 nibbles we pad out the
+  // front with a 0 prefix; e.g. `ABC` -> `0ABC`.
+  Output.resize((Input.size() + 1) / 2);
+  char *OutputPtr = const_cast<char *>(Output.data());
   if (Input.size() % 2 == 1) {
     uint8_t Hex = 0;
     if (!tryGetHexFromNibbles('0', Input.front(), Hex))
       return false;
-
-    Output.push_back(Hex);
+    *OutputPtr++ = Hex;
     Input = Input.drop_front();
   }
 
-  assert(Input.size() % 2 == 0);
-  while (!Input.empty()) {
+  // Convert the nibble pairs (e.g. `9C`) into bytes (0x9C).
+  // With the padding above we know the input is aligned and the output expects
+  // exactly half as many bytes as nibbles in the input.
+  size_t InputSize = Input.size();
+  assert(InputSize % 2 == 0);
+  const char *InputPtr = Input.data();
+  for (size_t OutputIndex = 0; OutputIndex < InputSize / 2; ++OutputIndex) {
     uint8_t Hex = 0;
-    if (!tryGetHexFromNibbles(Input[0], Input[1], Hex))
+    if (!tryGetHexFromNibbles(InputPtr[OutputIndex * 2 + 0], // MSB
+                              InputPtr[OutputIndex * 2 + 1], // LSB
+                              Hex))
       return false;
-
-    Output.push_back(Hex);
-    Input = Input.drop_front(2);
+    OutputPtr[OutputIndex] = Hex;
   }
   return true;
 }
@@ -501,6 +513,83 @@ public:
   }
 };
 
+/// A forward iterator over partitions of string over a separator.
+class SplittingIterator
+    : public iterator_facade_base<SplittingIterator, std::forward_iterator_tag,
+                                  StringRef> {
+  char SeparatorStorage;
+  StringRef Current;
+  StringRef Next;
+  StringRef Separator;
+
+public:
+  SplittingIterator(StringRef Str, StringRef Separator)
+      : Next(Str), Separator(Separator) {
+    ++*this;
+  }
+
+  SplittingIterator(StringRef Str, char Separator)
+      : SeparatorStorage(Separator), Next(Str),
+        Separator(&SeparatorStorage, 1) {
+    ++*this;
+  }
+
+  SplittingIterator(const SplittingIterator &R)
+      : SeparatorStorage(R.SeparatorStorage), Current(R.Current), Next(R.Next),
+        Separator(R.Separator) {
+    if (R.Separator.data() == &R.SeparatorStorage)
+      Separator = StringRef(&SeparatorStorage, 1);
+  }
+
+  SplittingIterator &operator=(const SplittingIterator &R) {
+    if (this == &R)
+      return *this;
+
+    SeparatorStorage = R.SeparatorStorage;
+    Current = R.Current;
+    Next = R.Next;
+    Separator = R.Separator;
+    if (R.Separator.data() == &R.SeparatorStorage)
+      Separator = StringRef(&SeparatorStorage, 1);
+    return *this;
+  }
+
+  bool operator==(const SplittingIterator &R) const {
+    assert(Separator == R.Separator);
+    return Current.data() == R.Current.data();
+  }
+
+  const StringRef &operator*() const { return Current; }
+
+  StringRef &operator*() { return Current; }
+
+  SplittingIterator &operator++() {
+    std::tie(Current, Next) = Next.split(Separator);
+    return *this;
+  }
+};
+
+/// Split the specified string over a separator and return a range-compatible
+/// iterable over its partitions.  Used to permit conveniently iterating
+/// over separated strings like so:
+///
+/// \code
+///   for (StringRef x : llvm::split("foo,bar,baz", ","))
+///     ...;
+/// \end
+///
+/// Note that the passed string must remain valid throuhgout lifetime
+/// of the iterators.
+inline iterator_range<SplittingIterator> split(StringRef Str, StringRef Separator) {
+  return {SplittingIterator(Str, Separator),
+          SplittingIterator(StringRef(), Separator)};
+}
+
+inline iterator_range<SplittingIterator> split(StringRef Str, char Separator) {
+  return {SplittingIterator(Str, Separator),
+          SplittingIterator(StringRef(), Separator)};
+}
+
 } // end namespace llvm
 
 #endif // LLVM_ADT_STRINGEXTRAS_H
diff --git a/llvm/include/llvm/ADT/StringMap.h b/llvm/include/llvm/ADT/StringMap.h
index a82afc9a817c..669956d41e0c 100644
--- a/llvm/include/llvm/ADT/StringMap.h
+++ b/llvm/include/llvm/ADT/StringMap.h
@@ -126,9 +126,7 @@ public:
 
   StringMap(std::initializer_list<std::pair<StringRef, ValueTy>> List)
       : StringMapImpl(List.size(), static_cast<unsigned>(sizeof(MapEntryTy))) {
-    for (const auto &P : List) {
-      insert(P);
-    }
+    insert(List);
   }
 
   StringMap(StringMap &&RHS)
@@ -297,6 +295,21 @@ public:
     return try_emplace(KV.first, std::move(KV.second));
   }
 
+  /// Inserts elements from range [first, last). If multiple elements in the
+  /// range have keys that compare equivalent, it is unspecified which element
+  /// is inserted .
+  template <typename InputIt> void insert(InputIt First, InputIt Last) {
+    for (InputIt It = First; It != Last; ++It)
+      insert(*It);
+  }
+
+  ///  Inserts elements from initializer list ilist. If multiple elements in
+  /// the range have keys that compare equivalent, it is unspecified which
+  /// element is inserted
+  void insert(std::initializer_list<std::pair<StringRef, ValueTy>> List) {
+    insert(List.begin(), List.end());
+  }
+
   /// Inserts an element or assigns to the current element if the key already
   /// exists. The return type is the same as try_emplace.
   template <typename V>
@@ -465,13 +478,7 @@ public:
   explicit StringMapKeyIterator(StringMapConstIterator<ValueTy> Iter)
       : base(std::move(Iter)) {}
 
-  StringRef &operator*() {
-    Key = this->wrapped()->getKey();
-    return Key;
-  }
-
-private:
-  StringRef Key;
+  StringRef operator*() const { return this->wrapped()->getKey(); }
 };
 
 } // end namespace llvm
diff --git a/llvm/include/llvm/ADT/StringRef.h b/llvm/include/llvm/ADT/StringRef.h
index 17e64f7f81bb..9f4b89218042 100644
--- a/llvm/include/llvm/ADT/StringRef.h
+++ b/llvm/include/llvm/ADT/StringRef.h
@@ -35,7 +35,6 @@ namespace llvm {
   class APInt;
   class hash_code;
   template <typename T> class SmallVectorImpl;
-  template <typename T> struct DenseMapInfo;
   class StringRef;
 
   /// Helper functions for StringRef::getAsInteger.
@@ -949,7 +948,7 @@ namespace llvm {
   hash_code hash_value(StringRef S);
 
   // Provide DenseMapInfo for StringRefs.
-  template <> struct DenseMapInfo<StringRef> {
+  template <> struct DenseMapInfo<StringRef, void> {
     static inline StringRef getEmptyKey() {
       return StringRef(
           reinterpret_cast<const char *>(~static_cast<uintptr_t>(0)), 0);
diff --git a/llvm/include/llvm/ADT/Triple.h b/llvm/include/llvm/ADT/Triple.h
index 76f3514050f0..2fd3047acbfd 100644
--- a/llvm/include/llvm/ADT/Triple.h
+++ b/llvm/include/llvm/ADT/Triple.h
@@ -93,6 +93,8 @@ public:
     hsail64,        // AMD HSAIL with 64-bit pointers
     spir,           // SPIR: standard portable IR for OpenCL 32-bit version
     spir64,         // SPIR: standard portable IR for OpenCL 64-bit version
+    spirv32,        // SPIR-V with 32-bit pointers
+    spirv64,        // SPIR-V with 64-bit pointers
     kalimba,        // Kalimba: generic kalimba
     shave,          // SHAVE: Movidius vector VLIW processors
     lanai,          // Lanai: Lanai 32-bit
@@ -106,6 +108,9 @@ public:
   enum SubArchType {
     NoSubArch,
 
+    ARMSubArch_v9_2a,
+    ARMSubArch_v9_1a,
+    ARMSubArch_v9,
     ARMSubArch_v8_7a,
     ARMSubArch_v8_6a,
     ARMSubArch_v8_5a,
@@ -290,10 +295,10 @@ public:
   /// @name Normalization
   /// @{
 
-  /// normalize - Turn an arbitrary machine specification into the canonical
-  /// triple form (or something sensible that the Triple class understands if
-  /// nothing better can reasonably be done).  In particular, it handles the
-  /// common case in which otherwise valid components are in the wrong order.
+  /// Turn an arbitrary machine specification into the canonical triple form (or
+  /// something sensible that the Triple class understands if nothing better can
+  /// reasonably be done).  In particular, it handles the common case in which
+  /// otherwise valid components are in the wrong order.
   static std::string normalize(StringRef Str);
 
   /// Return the normalized form of this triple's string.
@@ -303,25 +308,24 @@ public:
   /// @name Typed Component Access
   /// @{
 
-  /// getArch - Get the parsed architecture type of this triple.
+  /// Get the parsed architecture type of this triple.
   ArchType getArch() const { return Arch; }
 
-  /// getSubArch - get the parsed subarchitecture type for this triple.
+  /// get the parsed subarchitecture type for this triple.
   SubArchType getSubArch() const { return SubArch; }
 
-  /// getVendor - Get the parsed vendor type of this triple.
+  /// Get the parsed vendor type of this triple.
   VendorType getVendor() const { return Vendor; }
 
-  /// getOS - Get the parsed operating system type of this triple.
+  /// Get the parsed operating system type of this triple.
   OSType getOS() const { return OS; }
 
-  /// hasEnvironment - Does this triple have the optional environment
-  /// (fourth) component?
+  /// Does this triple have the optional environment (fourth) component?
   bool hasEnvironment() const {
     return getEnvironmentName() != "";
   }
 
-  /// getEnvironment - Get the parsed environment type of this triple.
+  /// Get the parsed environment type of this triple.
   EnvironmentType getEnvironment() const { return Environment; }
 
   /// Parse the version number from the OS name component of the
@@ -333,39 +337,39 @@ public:
   void getEnvironmentVersion(unsigned &Major, unsigned &Minor,
                              unsigned &Micro) const;
 
-  /// getFormat - Get the object format for this triple.
+  /// Get the object format for this triple.
   ObjectFormatType getObjectFormat() const { return ObjectFormat; }
 
-  /// getOSVersion - Parse the version number from the OS name component of the
-  /// triple, if present.
+  /// Parse the version number from the OS name component of the triple, if
+  /// present.
   ///
   /// For example, "fooos1.2.3" would return (1, 2, 3).
   ///
   /// If an entry is not defined, it will be returned as 0.
   void getOSVersion(unsigned &Major, unsigned &Minor, unsigned &Micro) const;
 
-  /// getOSMajorVersion - Return just the major version number, this is
-  /// specialized because it is a common query.
+  /// Return just the major version number, this is specialized because it is a
+  /// common query.
   unsigned getOSMajorVersion() const {
     unsigned Maj, Min, Micro;
     getOSVersion(Maj, Min, Micro);
     return Maj;
   }
 
-  /// getMacOSXVersion - Parse the version number as with getOSVersion and then
-  /// translate generic "darwin" versions to the corresponding OS X versions.
-  /// This may also be called with IOS triples but the OS X version number is
-  /// just set to a constant 10.4.0 in that case.  Returns true if successful.
+  /// Parse the version number as with getOSVersion and then translate generic
+  /// "darwin" versions to the corresponding OS X versions.  This may also be
+  /// called with IOS triples but the OS X version number is just set to a
+  /// constant 10.4.0 in that case.  Returns true if successful.
   bool getMacOSXVersion(unsigned &Major, unsigned &Minor,
                         unsigned &Micro) const;
 
-  /// getiOSVersion - Parse the version number as with getOSVersion.  This should
-  /// only be called with IOS or generic triples.
+  /// Parse the version number as with getOSVersion.  This should only be called
+  /// with IOS or generic triples.
   void getiOSVersion(unsigned &Major, unsigned &Minor,
                      unsigned &Micro) const;
 
-  /// getWatchOSVersion - Parse the version number as with getOSVersion.  This
-  /// should only be called with WatchOS or generic triples.
+  /// Parse the version number as with getOSVersion.  This should only be called
+  /// with WatchOS or generic triples.
   void getWatchOSVersion(unsigned &Major, unsigned &Minor,
                          unsigned &Micro) const;
 
@@ -377,24 +381,24 @@ public:
 
   const std::string &getTriple() const { return Data; }
 
-  /// getArchName - Get the architecture (first) component of the
-  /// triple.
+  /// Get the architecture (first) component of the triple.
   StringRef getArchName() const;
 
-  /// getVendorName - Get the vendor (second) component of the triple.
+  /// Get the architecture name based on Kind and SubArch.
+  StringRef getArchName(ArchType Kind, SubArchType SubArch = NoSubArch) const;
+
+  /// Get the vendor (second) component of the triple.
   StringRef getVendorName() const;
 
-  /// getOSName - Get the operating system (third) component of the
-  /// triple.
+  /// Get the operating system (third) component of the triple.
   StringRef getOSName() const;
 
-  /// getEnvironmentName - Get the optional environment (fourth)
-  /// component of the triple, or "" if empty.
+  /// Get the optional environment (fourth) component of the triple, or "" if
+  /// empty.
   StringRef getEnvironmentName() const;
 
-  /// getOSAndEnvironmentName - Get the operating system and optional
-  /// environment components as a single string (separated by a '-'
-  /// if the environment component is present).
+  /// Get the operating system and optional environment components as a single
+  /// string (separated by a '-' if the environment component is present).
   StringRef getOSAndEnvironmentName() const;
 
   /// @}
@@ -420,8 +424,8 @@ public:
   /// Note that this tests for 16-bit pointer width, and nothing else.
   bool isArch16Bit() const;
 
-  /// isOSVersionLT - Helper function for doing comparisons against version
-  /// numbers included in the target triple.
+  /// Helper function for doing comparisons against version numbers included in
+  /// the target triple.
   bool isOSVersionLT(unsigned Major, unsigned Minor = 0,
                      unsigned Micro = 0) const {
     unsigned LHS[3];
@@ -443,14 +447,13 @@ public:
     return isOSVersionLT(RHS[0], RHS[1], RHS[2]);
   }
 
-  /// isMacOSXVersionLT - Comparison function for checking OS X version
-  /// compatibility, which handles supporting skewed version numbering schemes
-  /// used by the "darwin" triples.
+  /// Comparison function for checking OS X version compatibility, which handles
+  /// supporting skewed version numbering schemes used by the "darwin" triples.
   bool isMacOSXVersionLT(unsigned Major, unsigned Minor = 0,
                          unsigned Micro = 0) const;
 
-  /// isMacOSX - Is this a Mac OS X triple. For legacy reasons, we support both
-  /// "darwin" and "osx" as OS X triples.
+  /// Is this a Mac OS X triple. For legacy reasons, we support both "darwin"
+  /// and "osx" as OS X triples.
   bool isMacOSX() const {
     return getOS() == Triple::Darwin || getOS() == Triple::MacOSX;
   }
@@ -480,7 +483,7 @@ public:
 
   bool isOSzOS() const { return getOS() == Triple::ZOS; }
 
-  /// isOSDarwin - Is this a "Darwin" OS (macOS, iOS, tvOS or watchOS).
+  /// Is this a "Darwin" OS (macOS, iOS, tvOS or watchOS).
   bool isOSDarwin() const {
     return isMacOSX() || isiOS() || isWatchOS();
   }
@@ -698,6 +701,11 @@ public:
     return getArch() == Triple::spir || getArch() == Triple::spir64;
   }
 
+  /// Tests whether the target is SPIR-V (32/64-bit).
+  bool isSPIRV() const {
+    return getArch() == Triple::spirv32 || getArch() == Triple::spirv64;
+  }
+
   /// Tests whether the target is NVPTX (32- or 64-bit).
   bool isNVPTX() const {
     return getArch() == Triple::nvptx || getArch() == Triple::nvptx64;
@@ -720,6 +728,19 @@ public:
     return getArch() == Triple::arm || getArch() == Triple::armeb;
   }
 
+  /// Tests whether the target supports the EHABI exception
+  /// handling standard.
+  bool isTargetEHABICompatible() const {
+    return (isARM() || isThumb()) &&
+           (getEnvironment() == Triple::EABI ||
+            getEnvironment() == Triple::GNUEABI ||
+            getEnvironment() == Triple::MuslEABI ||
+            getEnvironment() == Triple::EABIHF ||
+            getEnvironment() == Triple::GNUEABIHF ||
+            getEnvironment() == Triple::MuslEABIHF || isAndroid()) &&
+           isOSBinFormatELF();
+  }
+
   /// Tests whether the target is AArch64 (little and big endian).
   bool isAArch64() const {
     return getArch() == Triple::aarch64 || getArch() == Triple::aarch64_be ||
@@ -833,46 +854,38 @@ public:
   /// @name Mutators
   /// @{
 
-  /// setArch - Set the architecture (first) component of the triple
-  /// to a known type.
-  void setArch(ArchType Kind);
+  /// Set the architecture (first) component of the triple to a known type.
+  void setArch(ArchType Kind, SubArchType SubArch = NoSubArch);
 
-  /// setVendor - Set the vendor (second) component of the triple to a
-  /// known type.
+  /// Set the vendor (second) component of the triple to a known type.
   void setVendor(VendorType Kind);
 
-  /// setOS - Set the operating system (third) component of the triple
-  /// to a known type.
+  /// Set the operating system (third) component of the triple to a known type.
   void setOS(OSType Kind);
 
-  /// setEnvironment - Set the environment (fourth) component of the triple
-  /// to a known type.
+  /// Set the environment (fourth) component of the triple to a known type.
   void setEnvironment(EnvironmentType Kind);
 
-  /// setObjectFormat - Set the object file format
+  /// Set the object file format.
   void setObjectFormat(ObjectFormatType Kind);
 
-  /// setTriple - Set all components to the new triple \p Str.
+  /// Set all components to the new triple \p Str.
   void setTriple(const Twine &Str);
 
-  /// setArchName - Set the architecture (first) component of the
-  /// triple by name.
+  /// Set the architecture (first) component of the triple by name.
   void setArchName(StringRef Str);
 
-  /// setVendorName - Set the vendor (second) component of the triple
-  /// by name.
+  /// Set the vendor (second) component of the triple by name.
   void setVendorName(StringRef Str);
 
-  /// setOSName - Set the operating system (third) component of the
-  /// triple by name.
+  /// Set the operating system (third) component of the triple by name.
   void setOSName(StringRef Str);
 
-  /// setEnvironmentName - Set the optional environment (fourth)
-  /// component of the triple by name.
+  /// Set the optional environment (fourth) component of the triple by name.
   void setEnvironmentName(StringRef Str);
 
-  /// setOSAndEnvironmentName - Set the operating system and optional
-  /// environment components with a single string.
+  /// Set the operating system and optional environment components with a single
+  /// string.
   void setOSAndEnvironmentName(StringRef Str);
 
   /// @}
@@ -938,33 +951,30 @@ public:
   /// @name Static helpers for IDs.
   /// @{
 
-  /// getArchTypeName - Get the canonical name for the \p Kind architecture.
+  /// Get the canonical name for the \p Kind architecture.
   static StringRef getArchTypeName(ArchType Kind);
 
-  /// getArchTypePrefix - Get the "prefix" canonical name for the \p Kind
-  /// architecture. This is the prefix used by the architecture specific
-  /// builtins, and is suitable for passing to \see
-  /// Intrinsic::getIntrinsicForGCCBuiltin().
+  /// Get the "prefix" canonical name for the \p Kind architecture. This is the
+  /// prefix used by the architecture specific builtins, and is suitable for
+  /// passing to \see Intrinsic::getIntrinsicForGCCBuiltin().
   ///
   /// \return - The architecture prefix, or 0 if none is defined.
   static StringRef getArchTypePrefix(ArchType Kind);
 
-  /// getVendorTypeName - Get the canonical name for the \p Kind vendor.
+  /// Get the canonical name for the \p Kind vendor.
   static StringRef getVendorTypeName(VendorType Kind);
 
-  /// getOSTypeName - Get the canonical name for the \p Kind operating system.
+  /// Get the canonical name for the \p Kind operating system.
   static StringRef getOSTypeName(OSType Kind);
 
-  /// getEnvironmentTypeName - Get the canonical name for the \p Kind
-  /// environment.
+  /// Get the canonical name for the \p Kind environment.
   static StringRef getEnvironmentTypeName(EnvironmentType Kind);
 
   /// @}
   /// @name Static helpers for converting alternate architecture names.
   /// @{
 
-  /// getArchTypeForLLVMName - The canonical type for the given LLVM
-  /// architecture name (e.g., "x86").
+  /// The canonical type for the given LLVM architecture name (e.g., "x86").
   static ArchType getArchTypeForLLVMName(StringRef Str);
 
   /// @}
diff --git a/llvm/include/llvm/ADT/TypeSwitch.h b/llvm/include/llvm/ADT/TypeSwitch.h
index 815b9a40afaf..3b7598f3251d 100644
--- a/llvm/include/llvm/ADT/TypeSwitch.h
+++ b/llvm/include/llvm/ADT/TypeSwitch.h
@@ -35,7 +35,12 @@ public:
   /// Invoke a case on the derived class with multiple case types.
   template <typename CaseT, typename CaseT2, typename... CaseTs,
             typename CallableT>
-  DerivedT &Case(CallableT &&caseFn) {
+  // This is marked always_inline and nodebug so it doesn't show up in stack
+  // traces at -O0 (or other optimization levels).  Large TypeSwitch's are
+  // common, are equivalent to a switch, and don't add any value to stack
+  // traces.
+  LLVM_ATTRIBUTE_ALWAYS_INLINE LLVM_ATTRIBUTE_NODEBUG DerivedT &
+  Case(CallableT &&caseFn) {
     DerivedT &derived = static_cast<DerivedT &>(*this);
     return derived.template Case<CaseT>(caseFn)
         .template Case<CaseT2, CaseTs...>(caseFn);
diff --git a/llvm/include/llvm/ADT/iterator.h b/llvm/include/llvm/ADT/iterator.h
index b3c6608e9b6e..6f0c42fe08be 100644
--- a/llvm/include/llvm/ADT/iterator.h
+++ b/llvm/include/llvm/ADT/iterator.h
@@ -35,6 +35,21 @@ namespace llvm {
 /// terms of addition of one. These aren't equivalent for all iterator
 /// categories, and respecting that adds a lot of complexity for little gain.
 ///
+/// Iterators are expected to have const rules analogous to pointers, with a
+/// single, const-qualified operator*() that returns ReferenceT. This matches
+/// the second and third pointers in the following example:
+/// \code
+///   int Value;
+///   { int *I = &Value; }             // ReferenceT 'int&'
+///   { int *const I = &Value; }       // ReferenceT 'int&'; const
+///   { const int *I = &Value; }       // ReferenceT 'const int&'
+///   { const int *const I = &Value; } // ReferenceT 'const int&'; const
+/// \endcode
+/// If an iterator facade returns a handle to its own state, then T (and
+/// PointerT and ReferenceT) should usually be const-qualified. Otherwise, if
+/// clients are expected to modify the handle itself, the field can be declared
+/// mutable or use const_cast.
+///
 /// Classes wishing to use `iterator_facade_base` should implement the following
 /// methods:
 ///
@@ -42,8 +57,7 @@ namespace llvm {
 ///   (All of the following methods)
 ///   - DerivedT &operator=(const DerivedT &R);
 ///   - bool operator==(const DerivedT &R) const;
-///   - const T &operator*() const;
-///   - T &operator*();
+///   - T &operator*() const;
 ///   - DerivedT &operator++();
 ///
 /// Bidirectional Iterators:
@@ -95,6 +109,22 @@ protected:
     operator ReferenceT() const { return *I; }
   };
 
+  /// A proxy object for computing a pointer via indirecting a copy of a
+  /// reference. This is used in APIs which need to produce a pointer but for
+  /// which the reference might be a temporary. The proxy preserves the
+  /// reference internally and exposes the pointer via a arrow operator.
+  class PointerProxy {
+    friend iterator_facade_base;
+
+    ReferenceT R;
+
+    template <typename RefT>
+    PointerProxy(RefT &&R) : R(std::forward<RefT>(R)) {}
+
+  public:
+    PointerT operator->() const { return &R; }
+  };
+
 public:
   DerivedT operator+(DifferenceTypeT n) const {
     static_assert(std::is_base_of<iterator_facade_base, DerivedT>::value,
@@ -172,19 +202,13 @@ public:
     return !(static_cast<const DerivedT &>(*this) < RHS);
   }
 
-  PointerT operator->() { return &static_cast<DerivedT *>(this)->operator*(); }
-  PointerT operator->() const {
-    return &static_cast<const DerivedT *>(this)->operator*();
-  }
-  ReferenceProxy operator[](DifferenceTypeT n) {
-    static_assert(IsRandomAccess,
-                  "Subscripting is only defined for random access iterators.");
-    return ReferenceProxy(static_cast<DerivedT *>(this)->operator+(n));
+  PointerProxy operator->() const {
+    return static_cast<const DerivedT *>(this)->operator*();
   }
   ReferenceProxy operator[](DifferenceTypeT n) const {
     static_assert(IsRandomAccess,
                   "Subscripting is only defined for random access iterators.");
-    return ReferenceProxy(static_cast<const DerivedT *>(this)->operator+(n));
+    return static_cast<const DerivedT *>(this)->operator+(n);
   }
 };
 
@@ -330,8 +354,7 @@ public:
   explicit pointer_iterator(WrappedIteratorT u)
       : pointer_iterator::iterator_adaptor_base(std::move(u)) {}
 
-  T &operator*() { return Ptr = &*this->I; }
-  const T &operator*() const { return Ptr = &*this->I; }
+  T &operator*() const { return Ptr = &*this->I; }
 };
 
 template <typename RangeT, typename WrappedIteratorT =
diff --git a/llvm/include/llvm/Analysis/AliasAnalysis.h b/llvm/include/llvm/Analysis/AliasAnalysis.h
index 7fec0feb09d5..2770a1a9b277 100644
--- a/llvm/include/llvm/Analysis/AliasAnalysis.h
+++ b/llvm/include/llvm/Analysis/AliasAnalysis.h
@@ -61,6 +61,7 @@ class DominatorTree;
 class FenceInst;
 class Function;
 class InvokeInst;
+class LoopInfo;
 class PreservedAnalyses;
 class TargetLibraryInfo;
 class Value;
@@ -378,6 +379,50 @@ createModRefInfo(const FunctionModRefBehavior FMRB) {
   return ModRefInfo(FMRB & static_cast<int>(ModRefInfo::ModRef));
 }
 
+/// Virtual base class for providers of capture information.
+struct CaptureInfo {
+  virtual ~CaptureInfo() = 0;
+  virtual bool isNotCapturedBeforeOrAt(const Value *Object,
+                                       const Instruction *I) = 0;
+};
+
+/// Context-free CaptureInfo provider, which computes and caches whether an
+/// object is captured in the function at all, but does not distinguish whether
+/// it was captured before or after the context instruction.
+class SimpleCaptureInfo final : public CaptureInfo {
+  SmallDenseMap<const Value *, bool, 8> IsCapturedCache;
+
+public:
+  bool isNotCapturedBeforeOrAt(const Value *Object,
+                               const Instruction *I) override;
+};
+
+/// Context-sensitive CaptureInfo provider, which computes and caches the
+/// earliest common dominator closure of all captures. It provides a good
+/// approximation to a precise "captures before" analysis.
+class EarliestEscapeInfo final : public CaptureInfo {
+  DominatorTree &DT;
+  const LoopInfo &LI;
+
+  /// Map from identified local object to an instruction before which it does
+  /// not escape, or nullptr if it never escapes. The "earliest" instruction
+  /// may be a conservative approximation, e.g. the first instruction in the
+  /// function is always a legal choice.
+  DenseMap<const Value *, Instruction *> EarliestEscapes;
+
+  /// Reverse map from instruction to the objects it is the earliest escape for.
+  /// This is used for cache invalidation purposes.
+  DenseMap<Instruction *, TinyPtrVector<const Value *>> Inst2Obj;
+
+public:
+  EarliestEscapeInfo(DominatorTree &DT, const LoopInfo &LI) : DT(DT), LI(LI) {}
+
+  bool isNotCapturedBeforeOrAt(const Value *Object,
+                               const Instruction *I) override;
+
+  void removeInstruction(Instruction *I);
+};
+
 /// Reduced version of MemoryLocation that only stores a pointer and size.
 /// Used for caching AATags independent BasicAA results.
 struct AACacheLoc {
@@ -425,8 +470,7 @@ public:
   using AliasCacheT = SmallDenseMap<LocPair, CacheEntry, 8>;
   AliasCacheT AliasCache;
 
-  using IsCapturedCacheT = SmallDenseMap<const Value *, bool, 8>;
-  IsCapturedCacheT IsCapturedCache;
+  CaptureInfo *CI;
 
   /// Query depth used to distinguish recursive queries.
   unsigned Depth = 0;
@@ -439,18 +483,26 @@ public:
   /// assumption is disproven.
   SmallVector<AAQueryInfo::LocPair, 4> AssumptionBasedResults;
 
-  AAQueryInfo() : AliasCache(), IsCapturedCache() {}
+  AAQueryInfo(CaptureInfo *CI) : CI(CI) {}
 
   /// Create a new AAQueryInfo based on this one, but with the cache cleared.
   /// This is used for recursive queries across phis, where cache results may
   /// not be valid.
   AAQueryInfo withEmptyCache() {
-    AAQueryInfo NewAAQI;
+    AAQueryInfo NewAAQI(CI);
     NewAAQI.Depth = Depth;
     return NewAAQI;
   }
 };
 
+/// AAQueryInfo that uses SimpleCaptureInfo.
+class SimpleAAQueryInfo : public AAQueryInfo {
+  SimpleCaptureInfo CI;
+
+public:
+  SimpleAAQueryInfo() : AAQueryInfo(&CI) {}
+};
+
 class BatchAAResults;
 
 class AAResults {
@@ -770,7 +822,7 @@ public:
   /// helpers above.
   ModRefInfo getModRefInfo(const Instruction *I,
                            const Optional<MemoryLocation> &OptLoc) {
-    AAQueryInfo AAQIP;
+    SimpleAAQueryInfo AAQIP;
     return getModRefInfo(I, OptLoc, AAQIP);
   }
 
@@ -797,7 +849,7 @@ public:
   ModRefInfo callCapturesBefore(const Instruction *I,
                                 const MemoryLocation &MemLoc,
                                 DominatorTree *DT) {
-    AAQueryInfo AAQIP;
+    SimpleAAQueryInfo AAQIP;
     return callCapturesBefore(I, MemLoc, DT, AAQIP);
   }
 
@@ -896,9 +948,12 @@ private:
 class BatchAAResults {
   AAResults &AA;
   AAQueryInfo AAQI;
+  SimpleCaptureInfo SimpleCI;
 
 public:
-  BatchAAResults(AAResults &AAR) : AA(AAR), AAQI() {}
+  BatchAAResults(AAResults &AAR) : AA(AAR), AAQI(&SimpleCI) {}
+  BatchAAResults(AAResults &AAR, CaptureInfo *CI) : AA(AAR), AAQI(CI) {}
+
   AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB) {
     return AA.alias(LocA, LocB, AAQI);
   }
diff --git a/llvm/include/llvm/Analysis/AssumeBundleQueries.h b/llvm/include/llvm/Analysis/AssumeBundleQueries.h
index 49c0cd89a4db..77da19110246 100644
--- a/llvm/include/llvm/Analysis/AssumeBundleQueries.h
+++ b/llvm/include/llvm/Analysis/AssumeBundleQueries.h
@@ -20,7 +20,6 @@
 #include "llvm/ADT/DenseMap.h"
 
 namespace llvm {
-class IntrinsicInst;
 class AssumptionCache;
 class DominatorTree;
 
@@ -70,15 +69,15 @@ template<> struct DenseMapInfo<Attribute::AttrKind> {
 using RetainedKnowledgeKey = std::pair<Value *, Attribute::AttrKind>;
 
 struct MinMax {
-  unsigned Min;
-  unsigned Max;
+  uint64_t Min;
+  uint64_t Max;
 };
 
 /// A mapping from intrinsics (=`llvm.assume` calls) to a value range
 /// (=knowledge) that is encoded in them. How the value range is interpreted
 /// depends on the RetainedKnowledgeKey that was used to get this out of the
 /// RetainedKnowledgeMap.
-using Assume2KnowledgeMap = DenseMap<IntrinsicInst *, MinMax>;
+using Assume2KnowledgeMap = DenseMap<AssumeInst *, MinMax>;
 
 using RetainedKnowledgeMap =
     DenseMap<RetainedKnowledgeKey, Assume2KnowledgeMap>;
@@ -100,7 +99,7 @@ void fillMapFromAssume(AssumeInst &Assume, RetainedKnowledgeMap &Result);
 ///  - ArgValue will be 4.
 struct RetainedKnowledge {
   Attribute::AttrKind AttrKind = Attribute::None;
-  unsigned ArgValue = 0;
+  uint64_t ArgValue = 0;
   Value *WasOn = nullptr;
   bool operator==(RetainedKnowledge Other) const {
     return AttrKind == Other.AttrKind && WasOn == Other.WasOn &&
diff --git a/llvm/include/llvm/Analysis/AssumptionCache.h b/llvm/include/llvm/Analysis/AssumptionCache.h
index 51d04bd8cf02..12dd9b04c932 100644
--- a/llvm/include/llvm/Analysis/AssumptionCache.h
+++ b/llvm/include/llvm/Analysis/AssumptionCache.h
@@ -29,6 +29,7 @@ namespace llvm {
 class AssumeInst;
 class Function;
 class raw_ostream;
+class TargetTransformInfo;
 class Value;
 
 /// A cache of \@llvm.assume calls within a function.
@@ -59,6 +60,8 @@ private:
   /// We track this to lazily populate our assumptions.
   Function &F;
 
+  TargetTransformInfo *TTI;
+
   /// Vector of weak value handles to calls of the \@llvm.assume
   /// intrinsic.
   SmallVector<ResultElem, 4> AssumeHandles;
@@ -103,7 +106,8 @@ private:
 public:
   /// Construct an AssumptionCache from a function by scanning all of
   /// its instructions.
-  AssumptionCache(Function &F) : F(F) {}
+  AssumptionCache(Function &F, TargetTransformInfo *TTI = nullptr)
+      : F(F), TTI(TTI) {}
 
   /// This cache is designed to be self-updating and so it should never be
   /// invalidated.
@@ -174,9 +178,7 @@ class AssumptionAnalysis : public AnalysisInfoMixin<AssumptionAnalysis> {
 public:
   using Result = AssumptionCache;
 
-  AssumptionCache run(Function &F, FunctionAnalysisManager &) {
-    return AssumptionCache(F);
-  }
+  AssumptionCache run(Function &F, FunctionAnalysisManager &);
 };
 
 /// Printer pass for the \c AssumptionAnalysis results.
diff --git a/llvm/include/llvm/Analysis/BasicAliasAnalysis.h b/llvm/include/llvm/Analysis/BasicAliasAnalysis.h
index 991c0cbb642a..ed9d1ba4c5a7 100644
--- a/llvm/include/llvm/Analysis/BasicAliasAnalysis.h
+++ b/llvm/include/llvm/Analysis/BasicAliasAnalysis.h
@@ -13,10 +13,8 @@
 #ifndef LLVM_ANALYSIS_BASICALIASANALYSIS_H
 #define LLVM_ANALYSIS_BASICALIASANALYSIS_H
 
-#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/Pass.h"
@@ -28,7 +26,6 @@
 namespace llvm {
 
 struct AAMDNodes;
-class APInt;
 class AssumptionCache;
 class BasicBlock;
 class DataLayout;
@@ -98,71 +95,7 @@ public:
   FunctionModRefBehavior getModRefBehavior(const Function *Fn);
 
 private:
-  // A linear transformation of a Value; this class represents ZExt(SExt(V,
-  // SExtBits), ZExtBits) * Scale + Offset.
-  struct VariableGEPIndex {
-    // An opaque Value - we can't decompose this further.
-    const Value *V;
-
-    // We need to track what extensions we've done as we consider the same Value
-    // with different extensions as different variables in a GEP's linear
-    // expression;
-    // e.g.: if V == -1, then sext(x) != zext(x).
-    unsigned ZExtBits;
-    unsigned SExtBits;
-
-    APInt Scale;
-
-    // Context instruction to use when querying information about this index.
-    const Instruction *CxtI;
-
-    /// True if all operations in this expression are NSW.
-    bool IsNSW;
-
-    void dump() const {
-      print(dbgs());
-      dbgs() << "\n";
-    }
-    void print(raw_ostream &OS) const {
-      OS << "(V=" << V->getName()
-	 << ", zextbits=" << ZExtBits
-	 << ", sextbits=" << SExtBits
-	 << ", scale=" << Scale << ")";
-    }
-  };
-
-  // Represents the internal structure of a GEP, decomposed into a base pointer,
-  // constant offsets, and variable scaled indices.
-  struct DecomposedGEP {
-    // Base pointer of the GEP
-    const Value *Base;
-    // Total constant offset from base.
-    APInt Offset;
-    // Scaled variable (non-constant) indices.
-    SmallVector<VariableGEPIndex, 4> VarIndices;
-    // Is GEP index scale compile-time constant.
-    bool HasCompileTimeConstantScale;
-    // Are all operations inbounds GEPs or non-indexing operations?
-    // (None iff expression doesn't involve any geps)
-    Optional<bool> InBounds;
-
-    void dump() const {
-      print(dbgs());
-      dbgs() << "\n";
-    }
-    void print(raw_ostream &OS) const {
-      OS << "(DecomposedGEP Base=" << Base->getName()
-         << ", Offset=" << Offset
-         << ", VarIndices=[";
-      for (size_t i = 0; i < VarIndices.size(); i++) {
-        if (i != 0)
-          OS << ", ";
-        VarIndices[i].print(OS);
-      }
-      OS << "], HasCompileTimeConstantScale=" << HasCompileTimeConstantScale
-         << ")";
-    }
-  };
+  struct DecomposedGEP;
 
   /// Tracks phi nodes we have visited.
   ///
@@ -187,10 +120,6 @@ private:
   DecomposeGEPExpression(const Value *V, const DataLayout &DL,
                          AssumptionCache *AC, DominatorTree *DT);
 
-  static bool isGEPBaseAtNegativeOffset(const GEPOperator *GEPOp,
-      const DecomposedGEP &DecompGEP, const DecomposedGEP &DecompObject,
-      LocationSize ObjectAccessSize);
-
   /// A Heuristic for aliasGEP that searches for a constant offset
   /// between the variables.
   ///
@@ -200,15 +129,14 @@ private:
   /// However, we know that, for all %x, zext(%x) != zext(%x + 1), even if
   /// the addition overflows.
   bool
-  constantOffsetHeuristic(const SmallVectorImpl<VariableGEPIndex> &VarIndices,
-                          LocationSize V1Size, LocationSize V2Size,
-                          const APInt &BaseOffset, AssumptionCache *AC,
+  constantOffsetHeuristic(const DecomposedGEP &GEP, LocationSize V1Size,
+                          LocationSize V2Size, AssumptionCache *AC,
                           DominatorTree *DT);
 
   bool isValueEqualInPotentialCycles(const Value *V1, const Value *V2);
 
-  void GetIndexDifference(SmallVectorImpl<VariableGEPIndex> &Dest,
-                          const SmallVectorImpl<VariableGEPIndex> &Src);
+  void subtractDecomposedGEPs(DecomposedGEP &DestGEP,
+                              const DecomposedGEP &SrcGEP);
 
   AliasResult aliasGEP(const GEPOperator *V1, LocationSize V1Size,
                        const Value *V2, LocationSize V2Size,
diff --git a/llvm/include/llvm/Analysis/CGSCCPassManager.h b/llvm/include/llvm/Analysis/CGSCCPassManager.h
index e361cccef960..7cf172dc1dd1 100644
--- a/llvm/include/llvm/Analysis/CGSCCPassManager.h
+++ b/llvm/include/llvm/Analysis/CGSCCPassManager.h
@@ -20,7 +20,7 @@
 /// A secondary more general goal is to be able to isolate optimization on
 /// unrelated parts of the IR module. This is useful to ensure our
 /// optimizations are principled and don't miss oportunities where refinement
-/// of one part of the module influence transformations in another part of the
+/// of one part of the module influences transformations in another part of the
 /// module. But this is also useful if we want to parallelize the optimizations
 /// across common large module graph shapes which tend to be very wide and have
 /// large regions of unrelated cliques.
@@ -161,6 +161,12 @@ struct RequireAnalysisPass<AnalysisT, LazyCallGraph::SCC, CGSCCAnalysisManager,
     (void)AM.template getResult<AnalysisT>(C, CG);
     return PreservedAnalyses::all();
   }
+  void printPipeline(raw_ostream &OS,
+                     function_ref<StringRef(StringRef)> MapClassName2PassName) {
+    auto ClassName = AnalysisT::name();
+    auto PassName = MapClassName2PassName(ClassName);
+    OS << "require<" << PassName << ">";
+  }
 };
 
 /// A proxy from a \c CGSCCAnalysisManager to a \c Module.
@@ -215,7 +221,7 @@ using ModuleAnalysisManagerCGSCCProxy =
                               LazyCallGraph &>;
 
 /// Support structure for SCC passes to communicate updates the call graph back
-/// to the CGSCC pass manager infrsatructure.
+/// to the CGSCC pass manager infrastructure.
 ///
 /// The CGSCC pass manager runs SCC passes which are allowed to update the call
 /// graph and SCC structures. This means the structure the pass manager works
@@ -274,22 +280,22 @@ struct CGSCCUpdateResult {
 
   /// If non-null, the updated current \c RefSCC being processed.
   ///
-  /// This is set when a graph refinement takes place an the "current" point in
-  /// the graph moves "down" or earlier in the post-order walk. This will often
-  /// cause the "current" RefSCC to be a newly created RefSCC object and the
-  /// old one to be added to the above worklist. When that happens, this
+  /// This is set when a graph refinement takes place and the "current" point
+  /// in the graph moves "down" or earlier in the post-order walk. This will
+  /// often cause the "current" RefSCC to be a newly created RefSCC object and
+  /// the old one to be added to the above worklist. When that happens, this
   /// pointer is non-null and can be used to continue processing the "top" of
   /// the post-order walk.
   LazyCallGraph::RefSCC *UpdatedRC;
 
   /// If non-null, the updated current \c SCC being processed.
   ///
-  /// This is set when a graph refinement takes place an the "current" point in
-  /// the graph moves "down" or earlier in the post-order walk. This will often
-  /// cause the "current" SCC to be a newly created SCC object and the old one
-  /// to be added to the above worklist. When that happens, this pointer is
-  /// non-null and can be used to continue processing the "top" of the
-  /// post-order walk.
+  /// This is set when a graph refinement takes place and the "current" point
+  /// in the graph moves "down" or earlier in the post-order walk. This will
+  /// often cause the "current" SCC to be a newly created SCC object and the
+  /// old one to be added to the above worklist. When that happens, this
+  /// pointer is non-null and can be used to continue processing the "top" of
+  /// the post-order walk.
   LazyCallGraph::SCC *UpdatedC;
 
   /// Preserved analyses across SCCs.
@@ -298,7 +304,7 @@ struct CGSCCUpdateResult {
   /// (changing both the CG structure and the function IR itself). However,
   /// this means we need to take special care to correctly mark what analyses
   /// are preserved *across* SCCs. We have to track this out-of-band here
-  /// because within the main `PassManeger` infrastructure we need to mark
+  /// because within the main `PassManager` infrastructure we need to mark
   /// everything within an SCC as preserved in order to avoid repeatedly
   /// invalidating the same analyses as we unnest pass managers and adaptors.
   /// So we track the cross-SCC version of the preserved analyses here from any
@@ -363,6 +369,13 @@ public:
   /// Runs the CGSCC pass across every SCC in the module.
   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
 
+  void printPipeline(raw_ostream &OS,
+                     function_ref<StringRef(StringRef)> MapClassName2PassName) {
+    OS << "cgscc(";
+    Pass->printPipeline(OS, MapClassName2PassName);
+    OS << ")";
+  }
+
   static bool isRequired() { return true; }
 
 private:
@@ -377,8 +390,11 @@ createModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT &&Pass) {
   using PassModelT = detail::PassModel<LazyCallGraph::SCC, CGSCCPassT,
                                        PreservedAnalyses, CGSCCAnalysisManager,
                                        LazyCallGraph &, CGSCCUpdateResult &>;
+  // Do not use make_unique, it causes too many template instantiations,
+  // causing terrible compile times.
   return ModuleToPostOrderCGSCCPassAdaptor(
-      std::make_unique<PassModelT>(std::forward<CGSCCPassT>(Pass)));
+      std::unique_ptr<ModuleToPostOrderCGSCCPassAdaptor::PassConceptT>(
+          new PassModelT(std::forward<CGSCCPassT>(Pass))));
 }
 
 /// A proxy from a \c FunctionAnalysisManager to an \c SCC.
@@ -461,11 +477,14 @@ class CGSCCToFunctionPassAdaptor
 public:
   using PassConceptT = detail::PassConcept<Function, FunctionAnalysisManager>;
 
-  explicit CGSCCToFunctionPassAdaptor(std::unique_ptr<PassConceptT> Pass)
-      : Pass(std::move(Pass)) {}
+  explicit CGSCCToFunctionPassAdaptor(std::unique_ptr<PassConceptT> Pass,
+                                      bool EagerlyInvalidate, bool NoRerun)
+      : Pass(std::move(Pass)), EagerlyInvalidate(EagerlyInvalidate),
+        NoRerun(NoRerun) {}
 
   CGSCCToFunctionPassAdaptor(CGSCCToFunctionPassAdaptor &&Arg)
-      : Pass(std::move(Arg.Pass)) {}
+      : Pass(std::move(Arg.Pass)), EagerlyInvalidate(Arg.EagerlyInvalidate),
+        NoRerun(Arg.NoRerun) {}
 
   friend void swap(CGSCCToFunctionPassAdaptor &LHS,
                    CGSCCToFunctionPassAdaptor &RHS) {
@@ -481,24 +500,56 @@ public:
   PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM,
                         LazyCallGraph &CG, CGSCCUpdateResult &UR);
 
+  void printPipeline(raw_ostream &OS,
+                     function_ref<StringRef(StringRef)> MapClassName2PassName) {
+    OS << "function";
+    if (EagerlyInvalidate)
+      OS << "<eager-inv>";
+    OS << "(";
+    Pass->printPipeline(OS, MapClassName2PassName);
+    OS << ")";
+  }
+
   static bool isRequired() { return true; }
 
 private:
   std::unique_ptr<PassConceptT> Pass;
+  bool EagerlyInvalidate;
+  bool NoRerun;
 };
 
 /// A function to deduce a function pass type and wrap it in the
 /// templated adaptor.
 template <typename FunctionPassT>
 CGSCCToFunctionPassAdaptor
-createCGSCCToFunctionPassAdaptor(FunctionPassT &&Pass) {
+createCGSCCToFunctionPassAdaptor(FunctionPassT &&Pass,
+                                 bool EagerlyInvalidate = false,
+                                 bool NoRerun = false) {
   using PassModelT =
       detail::PassModel<Function, FunctionPassT, PreservedAnalyses,
                         FunctionAnalysisManager>;
+  // Do not use make_unique, it causes too many template instantiations,
+  // causing terrible compile times.
   return CGSCCToFunctionPassAdaptor(
-      std::make_unique<PassModelT>(std::forward<FunctionPassT>(Pass)));
+      std::unique_ptr<CGSCCToFunctionPassAdaptor::PassConceptT>(
+          new PassModelT(std::forward<FunctionPassT>(Pass))),
+      EagerlyInvalidate, NoRerun);
 }
 
+// A marker to determine if function passes should be run on a function within a
+// CGSCCToFunctionPassAdaptor. This is used to prevent running an expensive
+// function pass (manager) on a function multiple times if SCC mutations cause a
+// function to be visited multiple times and the function is not modified by
+// other SCC passes.
+class ShouldNotRunFunctionPassesAnalysis
+    : public AnalysisInfoMixin<ShouldNotRunFunctionPassesAnalysis> {
+public:
+  static AnalysisKey Key;
+  struct Result {};
+
+  Result run(Function &F, FunctionAnalysisManager &FAM) { return Result(); }
+};
+
 /// A helper that repeats an SCC pass each time an indirect call is refined to
 /// a direct call by that pass.
 ///
@@ -528,6 +579,13 @@ public:
   PreservedAnalyses run(LazyCallGraph::SCC &InitialC, CGSCCAnalysisManager &AM,
                         LazyCallGraph &CG, CGSCCUpdateResult &UR);
 
+  void printPipeline(raw_ostream &OS,
+                     function_ref<StringRef(StringRef)> MapClassName2PassName) {
+    OS << "devirt<" << MaxIterations << ">(";
+    Pass->printPipeline(OS, MapClassName2PassName);
+    OS << ")";
+  }
+
 private:
   std::unique_ptr<PassConceptT> Pass;
   int MaxIterations;
@@ -541,8 +599,11 @@ DevirtSCCRepeatedPass createDevirtSCCRepeatedPass(CGSCCPassT &&Pass,
   using PassModelT = detail::PassModel<LazyCallGraph::SCC, CGSCCPassT,
                                        PreservedAnalyses, CGSCCAnalysisManager,
                                        LazyCallGraph &, CGSCCUpdateResult &>;
+  // Do not use make_unique, it causes too many template instantiations,
+  // causing terrible compile times.
   return DevirtSCCRepeatedPass(
-      std::make_unique<PassModelT>(std::forward<CGSCCPassT>(Pass)),
+      std::unique_ptr<DevirtSCCRepeatedPass::PassConceptT>(
+          new PassModelT(std::forward<CGSCCPassT>(Pass))),
       MaxIterations);
 }
 
diff --git a/llvm/include/llvm/Analysis/CaptureTracking.h b/llvm/include/llvm/Analysis/CaptureTracking.h
index 9da5f18e944b..50d12db7a1c3 100644
--- a/llvm/include/llvm/Analysis/CaptureTracking.h
+++ b/llvm/include/llvm/Analysis/CaptureTracking.h
@@ -22,6 +22,8 @@ namespace llvm {
   class DataLayout;
   class Instruction;
   class DominatorTree;
+  class LoopInfo;
+  class Function;
 
   /// getDefaultMaxUsesToExploreForCaptureTracking - Return default value of
   /// the maximal number of uses to explore before giving up. It is used by
@@ -55,10 +57,25 @@ namespace llvm {
   /// MaxUsesToExplore specifies how many uses the analysis should explore for
   /// one value before giving up due too "too many uses". If MaxUsesToExplore
   /// is zero, a default value is assumed.
-  bool PointerMayBeCapturedBefore(
-      const Value *V, bool ReturnCaptures, bool StoreCaptures,
-      const Instruction *I, const DominatorTree *DT, bool IncludeI = false,
-      unsigned MaxUsesToExplore = 0);
+  bool PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures,
+                                  bool StoreCaptures, const Instruction *I,
+                                  const DominatorTree *DT,
+                                  bool IncludeI = false,
+                                  unsigned MaxUsesToExplore = 0,
+                                  const LoopInfo *LI = nullptr);
+
+  // Returns the 'earliest' instruction that captures \p V in \F. An instruction
+  // A is considered earlier than instruction B, if A dominates B. If 2 escapes
+  // do not dominate each other, the terminator of the common dominator is
+  // chosen. If not all uses can be analyzed, the earliest escape is set to
+  // the first instruction in the function entry block. If \p V does not escape,
+  // nullptr is returned. Note that the caller of the function has to ensure
+  // that the instruction the result value is compared against is not in a
+  // cycle.
+  Instruction *FindEarliestCapture(const Value *V, Function &F,
+                                   bool ReturnCaptures, bool StoreCaptures,
+                                   const DominatorTree &DT,
+                                   unsigned MaxUsesToExplore = 0);
 
   /// This callback is used in conjunction with PointerMayBeCaptured. In
   /// addition to the interface here, you'll need to provide your own getters
diff --git a/llvm/include/llvm/Analysis/ConstantFolding.h b/llvm/include/llvm/Analysis/ConstantFolding.h
index 62742fdf9a91..45fb879f0c1f 100644
--- a/llvm/include/llvm/Analysis/ConstantFolding.h
+++ b/llvm/include/llvm/Analysis/ConstantFolding.h
@@ -128,10 +128,25 @@ Constant *ConstantFoldExtractElementInstruction(Constant *Val, Constant *Idx);
 Constant *ConstantFoldShuffleVectorInstruction(Constant *V1, Constant *V2,
                                                ArrayRef<int> Mask);
 
-/// ConstantFoldLoadFromConstPtr - Return the value that a load from C would
-/// produce if it is constant and determinable.  If this is not determinable,
-/// return null.
-Constant *ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty, const DataLayout &DL);
+/// Extract value of C at the given Offset reinterpreted as Ty. If bits past
+/// the end of C are accessed, they are assumed to be poison.
+Constant *ConstantFoldLoadFromConst(Constant *C, Type *Ty, const APInt &Offset,
+                                    const DataLayout &DL);
+
+/// Extract value of C reinterpreted as Ty. Same as previous API with zero
+/// offset.
+Constant *ConstantFoldLoadFromConst(Constant *C, Type *Ty,
+                                    const DataLayout &DL);
+
+/// Return the value that a load from C with offset Offset would produce if it
+/// is constant and determinable. If this is not determinable, return null.
+Constant *ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty, APInt Offset,
+                                       const DataLayout &DL);
+
+/// Return the value that a load from C would produce if it is constant and
+/// determinable. If this is not determinable, return null.
+Constant *ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty,
+                                       const DataLayout &DL);
 
 /// ConstantFoldLoadThroughGEPConstantExpr - Given a constant and a
 /// getelementptr constantexpr, return the constant value being addressed by the
@@ -140,13 +155,6 @@ Constant *ConstantFoldLoadThroughGEPConstantExpr(Constant *C, ConstantExpr *CE,
                                                  Type *Ty,
                                                  const DataLayout &DL);
 
-/// ConstantFoldLoadThroughGEPIndices - Given a constant and getelementptr
-/// indices (with an *implied* zero pointer index that is not in the list),
-/// return the constant value being addressed by a virtual load, or null if
-/// something is funny and we can't decide.
-Constant *ConstantFoldLoadThroughGEPIndices(Constant *C,
-                                            ArrayRef<Constant *> Indices);
-
 /// canConstantFoldCallTo - Return true if its even possible to fold a call to
 /// the specified function.
 bool canConstantFoldCallTo(const CallBase *Call, const Function *F);
diff --git a/llvm/include/llvm/Analysis/CostModel.h b/llvm/include/llvm/Analysis/CostModel.h
new file mode 100644
index 000000000000..649168050cec
--- /dev/null
+++ b/llvm/include/llvm/Analysis/CostModel.h
@@ -0,0 +1,26 @@
+//===- CostModel.h - --------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_COSTMODEL_H
+#define LLVM_ANALYSIS_COSTMODEL_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+/// Printer pass for cost modeling results.
+class CostModelPrinterPass : public PassInfoMixin<CostModelPrinterPass> {
+  raw_ostream &OS;
+
+public:
+  explicit CostModelPrinterPass(raw_ostream &OS) : OS(OS) {}
+
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+} // end namespace llvm
+
+#endif // LLVM_ANALYSIS_COSTMODEL_H
diff --git a/llvm/include/llvm/Analysis/Delinearization.h b/llvm/include/llvm/Analysis/Delinearization.h
index 2658b6bbc80c..6e942530f253 100644
--- a/llvm/include/llvm/Analysis/Delinearization.h
+++ b/llvm/include/llvm/Analysis/Delinearization.h
@@ -16,10 +16,115 @@
 #ifndef LLVM_ANALYSIS_DELINEARIZATION_H
 #define LLVM_ANALYSIS_DELINEARIZATION_H
 
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/Support/raw_ostream.h"
 
 namespace llvm {
+class GetElementPtrInst;
+class ScalarEvolution;
+class SCEV;
+
+/// Compute the array dimensions Sizes from the set of Terms extracted from
+/// the memory access function of this SCEVAddRecExpr (second step of
+/// delinearization).
+void findArrayDimensions(ScalarEvolution &SE,
+                         SmallVectorImpl<const SCEV *> &Terms,
+                         SmallVectorImpl<const SCEV *> &Sizes,
+                         const SCEV *ElementSize);
+
+/// Collect parametric terms occurring in step expressions (first step of
+/// delinearization).
+void collectParametricTerms(ScalarEvolution &SE, const SCEV *Expr,
+                            SmallVectorImpl<const SCEV *> &Terms);
+
+/// Return in Subscripts the access functions for each dimension in Sizes
+/// (third step of delinearization).
+void computeAccessFunctions(ScalarEvolution &SE, const SCEV *Expr,
+                            SmallVectorImpl<const SCEV *> &Subscripts,
+                            SmallVectorImpl<const SCEV *> &Sizes);
+/// Split this SCEVAddRecExpr into two vectors of SCEVs representing the
+/// subscripts and sizes of an array access.
+///
+/// The delinearization is a 3 step process: the first two steps compute the
+/// sizes of each subscript and the third step computes the access functions
+/// for the delinearized array:
+///
+/// 1. Find the terms in the step functions
+/// 2. Compute the array size
+/// 3. Compute the access function: divide the SCEV by the array size
+///    starting with the innermost dimensions found in step 2. The Quotient
+///    is the SCEV to be divided in the next step of the recursion. The
+///    Remainder is the subscript of the innermost dimension. Loop over all
+///    array dimensions computed in step 2.
+///
+/// To compute a uniform array size for several memory accesses to the same
+/// object, one can collect in step 1 all the step terms for all the memory
+/// accesses, and compute in step 2 a unique array shape. This guarantees
+/// that the array shape will be the same across all memory accesses.
+///
+/// FIXME: We could derive the result of steps 1 and 2 from a description of
+/// the array shape given in metadata.
+///
+/// Example:
+///
+/// A[][n][m]
+///
+/// for i
+///   for j
+///     for k
+///       A[j+k][2i][5i] =
+///
+/// The initial SCEV:
+///
+/// A[{{{0,+,2*m+5}_i, +, n*m}_j, +, n*m}_k]
+///
+/// 1. Find the different terms in the step functions:
+/// -> [2*m, 5, n*m, n*m]
+///
+/// 2. Compute the array size: sort and unique them
+/// -> [n*m, 2*m, 5]
+/// find the GCD of all the terms = 1
+/// divide by the GCD and erase constant terms
+/// -> [n*m, 2*m]
+/// GCD = m
+/// divide by GCD -> [n, 2]
+/// remove constant terms
+/// -> [n]
+/// size of the array is A[unknown][n][m]
+///
+/// 3. Compute the access function
+/// a. Divide {{{0,+,2*m+5}_i, +, n*m}_j, +, n*m}_k by the innermost size m
+/// Quotient: {{{0,+,2}_i, +, n}_j, +, n}_k
+/// Remainder: {{{0,+,5}_i, +, 0}_j, +, 0}_k
+/// The remainder is the subscript of the innermost array dimension: [5i].
+///
+/// b. Divide Quotient: {{{0,+,2}_i, +, n}_j, +, n}_k by next outer size n
+/// Quotient: {{{0,+,0}_i, +, 1}_j, +, 1}_k
+/// Remainder: {{{0,+,2}_i, +, 0}_j, +, 0}_k
+/// The Remainder is the subscript of the next array dimension: [2i].
+///
+/// The subscript of the outermost dimension is the Quotient: [j+k].
+///
+/// Overall, we have: A[][n][m], and the access function: A[j+k][2i][5i].
+void delinearize(ScalarEvolution &SE, const SCEV *Expr,
+                 SmallVectorImpl<const SCEV *> &Subscripts,
+                 SmallVectorImpl<const SCEV *> &Sizes, const SCEV *ElementSize);
+
+/// Gathers the individual index expressions from a GEP instruction.
+///
+/// This function optimistically assumes the GEP references into a fixed size
+/// array. If this is actually true, this function returns a list of array
+/// subscript expressions in \p Subscripts and a list of integers describing
+/// the size of the individual array dimensions in \p Sizes. Both lists have
+/// either equal length or the size list is one element shorter in case there
+/// is no known size available for the outermost array dimension. Returns true
+/// if successful and false otherwise.
+bool getIndexExpressionsFromGEP(ScalarEvolution &SE,
+                                const GetElementPtrInst *GEP,
+                                SmallVectorImpl<const SCEV *> &Subscripts,
+                                SmallVectorImpl<int> &Sizes);
+
 struct DelinearizationPrinterPass
     : public PassInfoMixin<DelinearizationPrinterPass> {
   explicit DelinearizationPrinterPass(raw_ostream &OS);
diff --git a/llvm/include/llvm/Analysis/HeatUtils.h b/llvm/include/llvm/Analysis/HeatUtils.h
index b665e211c6ac..9ecbbaf318da 100644
--- a/llvm/include/llvm/Analysis/HeatUtils.h
+++ b/llvm/include/llvm/Analysis/HeatUtils.h
@@ -1,9 +1,8 @@
 //===-- HeatUtils.h - Utility for printing heat colors ----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
index b623b9ca58d8..51c5c620230b 100644
--- a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
+++ b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
@@ -110,7 +110,8 @@ enum InstrType { Legal, Illegal, Invisible };
 /// by \ref isSameOperationAs.
 /// TODO: Handle GetElementPtrInsts, as some of the operands have to be the
 /// exact same, and some do not.
-struct IRInstructionData : ilist_node<IRInstructionData> {
+struct IRInstructionData
+    : ilist_node<IRInstructionData, ilist_sentinel_tracking<true>> {
 
   /// The source Instruction that is being wrapped.
   Instruction *Inst = nullptr;
@@ -127,12 +128,41 @@ struct IRInstructionData : ilist_node<IRInstructionData> {
   /// to a less than form.  It is None otherwise.
   Optional<CmpInst::Predicate> RevisedPredicate;
 
+  /// This structure holds the distances of how far "ahead of" or "behind" the
+  /// target blocks of a branch, or the incoming blocks of a phi nodes are.
+  /// If the value is negative, it means that the block was registered before
+  /// the block of this instruction in terms of blocks in the function.
+  /// Code Example:
+  /// \code
+  /// block_1:
+  ///   br i1 %0, label %block_2, label %block_3
+  /// block_2:
+  ///   br i1 %1, label %block_1, label %block_2
+  /// block_3:
+  ///   br i1 %2, label %block_2, label %block_1
+  /// ; Replacing the labels with relative values, this becomes:
+  /// block_1:
+  ///   br i1 %0, distance 1, distance 2
+  /// block_2:
+  ///   br i1 %1, distance -1, distance 0
+  /// block_3:
+  ///   br i1 %2, distance -1, distance -2
+  /// \endcode
+  /// Taking block_2 as our example, block_1 is "behind" block_2, and block_2 is
+  /// "ahead" of block_2.
+  SmallVector<int, 4> RelativeBlockLocations;
+
   /// Gather the information that is difficult to gather for an Instruction, or
   /// is changed. i.e. the operands of an Instruction and the Types of those
   /// operands. This extra information allows for similarity matching to make
   /// assertions that allow for more flexibility when checking for whether an
   /// Instruction performs the same operation.
   IRInstructionData(Instruction &I, bool Legality, IRInstructionDataList &IDL);
+  IRInstructionData(IRInstructionDataList &IDL);
+
+  /// Fills data stuctures for IRInstructionData when it is constructed from a
+  // reference or a pointer.
+  void initializeInstruction();
 
   /// Get the predicate that the compare instruction is using for hashing the
   /// instruction. the IRInstructionData must be wrapping a CmpInst.
@@ -145,6 +175,16 @@ struct IRInstructionData : ilist_node<IRInstructionData> {
   /// \return the consistent comparison predicate. 
   static CmpInst::Predicate predicateForConsistency(CmpInst *CI);
 
+  /// For an IRInstructionData containing a branch, finds the
+  /// relative distances from the source basic block to the target by taking
+  /// the difference of the number assigned to the current basic block and the
+  /// target basic block of the branch.
+  ///
+  /// \param BasicBlockToInteger - The mapping of basic blocks to their location
+  /// in the module.
+  void
+  setBranchSuccessors(DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger);
+
   /// Hashes \p Value based on its opcode, types, and operand types.
   /// Two IRInstructionData instances produce the same hash when they perform
   /// the same operation.
@@ -198,7 +238,8 @@ struct IRInstructionData : ilist_node<IRInstructionData> {
   IRInstructionDataList *IDL = nullptr;
 };
 
-struct IRInstructionDataList : simple_ilist<IRInstructionData> {};
+struct IRInstructionDataList
+    : simple_ilist<IRInstructionData, ilist_sentinel_tracking<true>> {};
 
 /// Compare one IRInstructionData class to another IRInstructionData class for
 /// whether they are performing a the same operation, and can mapped to the
@@ -288,6 +329,10 @@ struct IRInstructionMapper {
   DenseMap<IRInstructionData *, unsigned, IRInstructionDataTraits>
       InstructionIntegerMap;
 
+  /// A mapping for a basic block in a module to its assigned number/location
+  /// in the module.
+  DenseMap<BasicBlock *, unsigned> BasicBlockToInteger;
+
   /// Set if we added an illegal number in the previous step.
   /// Since each illegal number is unique, we only need one of them between
   /// each range of legal numbers. This lets us make sure we don't add more
@@ -322,6 +367,14 @@ struct IRInstructionMapper {
   IRInstructionData *allocateIRInstructionData(Instruction &I, bool Legality,
                                                IRInstructionDataList &IDL);
 
+  /// Get an empty allocated IRInstructionData struct using the
+  /// InstDataAllocator.
+  ///
+  /// \param IDL - The InstructionDataList that the IRInstructionData is
+  /// inserted into.
+  /// \returns An allocated IRInstructionData struct.
+  IRInstructionData *allocateIRInstructionData(IRInstructionDataList &IDL);
+
   /// Get an allocated IRInstructionDataList object using the IDLAllocator.
   ///
   /// \returns An allocated IRInstructionDataList object.
@@ -329,6 +382,24 @@ struct IRInstructionMapper {
 
   IRInstructionDataList *IDL = nullptr;
 
+  /// Assigns values to all the basic blocks in function \p F starting from
+  /// integer \p BBNumber.
+  ///
+  /// \param F - The function containing the basic blocks to assign numbers to.
+  /// \param BBNumber - The number to start from.
+  void initializeForBBs(Function &F, unsigned &BBNumber) {
+    for (BasicBlock &BB : F)
+      BasicBlockToInteger.insert(std::make_pair(&BB, BBNumber++));
+  }
+
+  /// Assigns values to all the basic blocks in Module \p M.
+  /// \param M - The module containing the basic blocks to assign numbers to.
+  void initializeForBBs(Module &M) {
+    unsigned BBNumber = 0;
+    for (Function &F : M)
+      initializeForBBs(F, BBNumber);
+  }
+
   /// Maps the Instructions in a BasicBlock \p BB to legal or illegal integers
   /// determined by \p InstrType. Two Instructions are mapped to the same value
   /// if they are close as defined by the InstructionData class above.
@@ -386,7 +457,11 @@ struct IRInstructionMapper {
     InstructionClassification() {}
 
     // TODO: Determine a scheme to resolve when the label is similar enough.
-    InstrType visitBranchInst(BranchInst &BI) { return Illegal; }
+    InstrType visitBranchInst(BranchInst &BI) {
+      if (EnableBranches)
+        return Legal;
+      return Illegal;
+    }
     // TODO: Determine a scheme to resolve when the labels are similar enough.
     InstrType visitPHINode(PHINode &PN) { return Illegal; }
     // TODO: Handle allocas.
@@ -419,6 +494,10 @@ struct IRInstructionMapper {
     // TODO: Handle interblock similarity.
     InstrType visitTerminator(Instruction &I) { return Illegal; }
     InstrType visitInstruction(Instruction &I) { return Legal; }
+
+    // The flag variable that lets the classifier know whether we should
+    // allow branches to be checked for similarity.
+    bool EnableBranches = false;
   };
 
   /// Maps an Instruction to a member of InstrType.
@@ -488,6 +567,12 @@ private:
   DenseMap<Value *, unsigned> ValueToNumber;
   /// Stores the mapping of the number to the value assigned this number.
   DenseMap<unsigned, Value *> NumberToValue;
+  /// Stores the mapping of a value's number to canonical numbering in the
+  /// candidate's respective similarity group.
+  DenseMap<unsigned, unsigned> NumberToCanonNum;
+  /// Stores the mapping of canonical number in the candidate's respective
+  /// similarity group to a value number.
+  DenseMap<unsigned, unsigned> CanonNumToNumber;
   /// @}
 
 public:
@@ -506,13 +591,27 @@ public:
   static bool isSimilar(const IRSimilarityCandidate &A,
                         const IRSimilarityCandidate &B);
 
-  /// \param A - The first IRInstructionCandidate to compare.
-  /// \param B - The second IRInstructionCandidate to compare.
+  /// \param [in] A - The first IRInstructionCandidate to compare.
+  /// \param [in] B - The second IRInstructionCandidate to compare.
   /// \returns True when every IRInstructionData in \p A is structurally similar
   /// to \p B.
   static bool compareStructure(const IRSimilarityCandidate &A,
                                const IRSimilarityCandidate &B);
 
+  /// \param [in] A - The first IRInstructionCandidate to compare.
+  /// \param [in] B - The second IRInstructionCandidate to compare.
+  /// \param [in,out] ValueNumberMappingA - A mapping of value numbers from
+  /// candidate \p A to candidate \B.
+  /// \param [in,out] ValueNumberMappingB - A mapping of value numbers from
+  /// candidate \p B to candidate \A.
+  /// \returns True when every IRInstructionData in \p A is structurally similar
+  /// to \p B.
+  static bool
+  compareStructure(const IRSimilarityCandidate &A,
+                   const IRSimilarityCandidate &B,
+                   DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA,
+                   DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB);
+
   struct OperandMapping {
     /// The IRSimilarityCandidate that holds the instruction the OperVals were
     /// pulled from.
@@ -526,6 +625,21 @@ public:
     DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMapping;
   };
 
+  /// A helper struct to hold the candidate, for a branch instruction, the
+  /// relative location of a label, and the label itself.  This is mostly to
+  /// group the values together before passing them as a bundle to a function.
+  struct RelativeLocMapping {
+    /// The IRSimilarityCandidate that holds the instruction the relative
+    /// location was pulled from.
+    const IRSimilarityCandidate &IRSC;
+
+    /// The relative location to be analyzed.
+    int RelativeLocation;
+
+    /// The corresponding value.
+    Value *OperVal;
+  };
+
   /// Compare the operands in \p A and \p B and check that the current mapping
   /// of global value numbers from \p A to \p B and \p B to \A is consistent.
   ///
@@ -549,6 +663,94 @@ public:
   static bool compareCommutativeOperandMapping(OperandMapping A,
                                                OperandMapping B);
 
+  /// Compare the relative locations in \p A and \p B and check that the
+  /// distances match if both locations are contained in the region, and that
+  /// the branches both point outside the region if they do not.
+  /// Example Region:
+  /// \code
+  /// entry:
+  ///   br i1 %0, label %block_1, label %block_3
+  /// block_0:
+  ///   br i1 %0, label %block_1, label %block_2
+  /// block_1:
+  ///   br i1 %0, label %block_2, label %block_3
+  /// block_2:
+  ///   br i1 %1, label %block_1, label %block_4
+  /// block_3:
+  ///   br i1 %2, label %block_2, label %block_5
+  /// \endcode
+  /// If we compare the branches in block_0 and block_1 the relative values are
+  /// 1 and 2 for both, so we consider this a match.
+  ///
+  /// If we compare the branches in entry and block_0 the relative values are
+  /// 2 and 3, and 1 and 2 respectively.  Since these are not the same we do not
+  /// consider them a match.
+  ///
+  /// If we compare the branches in block_1 and block_2 the relative values are
+  /// 1 and 2, and -1 and None respectively.  As a result we do not consider
+  /// these to be the same
+  ///
+  /// If we compare the branches in block_2 and block_3 the relative values are
+  /// -1 and None for both.  We do consider these to be a match.
+  ///
+  /// \param A - The first IRInstructionCandidate, relative location value,
+  /// and incoming block.
+  /// \param B - The second IRInstructionCandidate, relative location value,
+  /// and incoming block.
+  /// \returns true if the relative locations match.
+  static bool checkRelativeLocations(RelativeLocMapping A,
+                                     RelativeLocMapping B);
+
+  /// Create a mapping from the value numbering to a different separate set of
+  /// numbers. This will serve as a guide for relating one candidate to another.
+  /// The canonical number gives use the ability identify which global value
+  /// number in one candidate relates to the global value number in the other.
+  ///
+  /// \param [in, out] CurrCand - The IRSimilarityCandidate to create a
+  /// canonical numbering for.
+  static void createCanonicalMappingFor(IRSimilarityCandidate &CurrCand);
+
+  /// Create a mapping for the value numbering of the calling
+  /// IRSimilarityCandidate, to a different separate set of numbers, based on
+  /// the canonical ordering in \p SourceCand. These are defined based on the
+  /// found mappings in \p ToSourceMapping and \p FromSourceMapping.  Both of
+  /// these relationships should have the same information, just in opposite
+  /// directions.
+  ///
+  /// \param [in, out] SourceCand - The IRSimilarityCandidate to create a
+  /// canonical numbering from.
+  /// \param ToSourceMapping - The mapping of value numbers from this candidate
+  /// to \p SourceCand.
+  /// \param FromSourceMapping - The mapping of value numbers from \p SoureCand
+  /// to this candidate.
+  void createCanonicalRelationFrom(
+      IRSimilarityCandidate &SourceCand,
+      DenseMap<unsigned, DenseSet<unsigned>> &ToSourceMapping,
+      DenseMap<unsigned, DenseSet<unsigned>> &FromSourceMapping);
+
+  /// \param [in,out] BBSet - The set to track the basic blocks.
+  void getBasicBlocks(DenseSet<BasicBlock *> &BBSet) const {
+    for (IRInstructionData &ID : *this) {
+      BasicBlock *BB = ID.Inst->getParent();
+      if (BBSet.contains(BB))
+        continue;
+      BBSet.insert(BB);
+    }
+  }
+
+  /// \param [in,out] BBSet - The set to track the basic blocks.
+  /// \param [in,out] BBList - A list in order of use to track the basic blocks.
+  void getBasicBlocks(DenseSet<BasicBlock *> &BBSet,
+                      SmallVector<BasicBlock *> &BBList) const {
+    for (IRInstructionData &ID : *this) {
+      BasicBlock *BB = ID.Inst->getParent();
+      if (BBSet.contains(BB))
+        continue;
+      BBSet.insert(BB);
+      BBList.push_back(BB);
+    }
+  }
+
   /// Compare the start and end indices of the two IRSimilarityCandidates for
   /// whether they overlap. If the start instruction of one
   /// IRSimilarityCandidate is less than the end instruction of the other, and
@@ -611,6 +813,32 @@ public:
     return VNIt->second;
   }
 
+  /// Find the canonical number from the global value number \p N stored in the
+  /// candidate.
+  ///
+  /// \param N - The global value number to find the canonical number for.
+  /// \returns An optional containing the value, and None if it could not be
+  /// found.
+  Optional<unsigned> getCanonicalNum(unsigned N) {
+    DenseMap<unsigned, unsigned>::iterator NCIt = NumberToCanonNum.find(N);
+    if (NCIt == NumberToCanonNum.end())
+      return None;
+    return NCIt->second;
+  }
+
+  /// Find the global value number from the canonical number \p N stored in the
+  /// candidate.
+  ///
+  /// \param N - The canonical number to find the global vlaue number for.
+  /// \returns An optional containing the value, and None if it could not be
+  /// found.
+  Optional<unsigned> fromCanonicalNum(unsigned N) {
+    DenseMap<unsigned, unsigned>::iterator CNIt = CanonNumToNumber.find(N);
+    if (CNIt == CanonNumToNumber.end())
+      return None;
+    return CNIt->second;
+  }
+
   /// \param RHS -The IRSimilarityCandidate to compare against
   /// \returns true if the IRSimilarityCandidate is occurs after the
   /// IRSimilarityCandidate in the program.
@@ -623,6 +851,9 @@ public:
   iterator end() const { return std::next(iterator(back())); }
 };
 
+typedef DenseMap<IRSimilarityCandidate *,
+                 DenseMap<unsigned, DenseSet<unsigned>>>
+    CandidateGVNMapping;
 typedef std::vector<IRSimilarityCandidate> SimilarityGroup;
 typedef std::vector<SimilarityGroup> SimilarityGroupList;
 
@@ -651,8 +882,9 @@ typedef std::vector<SimilarityGroup> SimilarityGroupList;
 /// analyzing the module.
 class IRSimilarityIdentifier {
 public:
-  IRSimilarityIdentifier()
-      : Mapper(&InstDataAllocator, &InstDataListAllocator) {}
+  IRSimilarityIdentifier(bool MatchBranches = true)
+      : Mapper(&InstDataAllocator, &InstDataListAllocator),
+        EnableBranches(MatchBranches) {}
 
 private:
   /// Map the instructions in the module to unsigned integers, using mapping
@@ -728,6 +960,10 @@ private:
   /// instance of IRInstructionData.
   IRInstructionMapper Mapper;
 
+  /// The flag variable that marks whether we should check branches for
+  /// similarity, or only look within basic blocks.
+  bool EnableBranches = true;
+
   /// The SimilarityGroups found with the most recent run of \ref
   /// findSimilarity. None if there is no recent run.
   Optional<SimilarityGroupList> SimilarityCandidates;
diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h
index 82e1b14960bd..c26dbc457949 100644
--- a/llvm/include/llvm/Analysis/IVDescriptors.h
+++ b/llvm/include/llvm/Analysis/IVDescriptors.h
@@ -36,20 +36,24 @@ class DominatorTree;
 
 /// These are the kinds of recurrences that we support.
 enum class RecurKind {
-  None,   ///< Not a recurrence.
-  Add,    ///< Sum of integers.
-  Mul,    ///< Product of integers.
-  Or,     ///< Bitwise or logical OR of integers.
-  And,    ///< Bitwise or logical AND of integers.
-  Xor,    ///< Bitwise or logical XOR of integers.
-  SMin,   ///< Signed integer min implemented in terms of select(cmp()).
-  SMax,   ///< Signed integer max implemented in terms of select(cmp()).
-  UMin,   ///< Unisgned integer min implemented in terms of select(cmp()).
-  UMax,   ///< Unsigned integer max implemented in terms of select(cmp()).
-  FAdd,   ///< Sum of floats.
-  FMul,   ///< Product of floats.
-  FMin,   ///< FP min implemented in terms of select(cmp()).
-  FMax    ///< FP max implemented in terms of select(cmp()).
+  None,       ///< Not a recurrence.
+  Add,        ///< Sum of integers.
+  Mul,        ///< Product of integers.
+  Or,         ///< Bitwise or logical OR of integers.
+  And,        ///< Bitwise or logical AND of integers.
+  Xor,        ///< Bitwise or logical XOR of integers.
+  SMin,       ///< Signed integer min implemented in terms of select(cmp()).
+  SMax,       ///< Signed integer max implemented in terms of select(cmp()).
+  UMin,       ///< Unisgned integer min implemented in terms of select(cmp()).
+  UMax,       ///< Unsigned integer max implemented in terms of select(cmp()).
+  FAdd,       ///< Sum of floats.
+  FMul,       ///< Product of floats.
+  FMin,       ///< FP min implemented in terms of select(cmp()).
+  FMax,       ///< FP max implemented in terms of select(cmp()).
+  SelectICmp, ///< Integer select(icmp(),x,y) where one of (x,y) is loop
+              ///< invariant
+  SelectFCmp  ///< Integer select(fcmp(),x,y) where one of (x,y) is loop
+              ///< invariant
 };
 
 /// The RecurrenceDescriptor is used to identify recurrences variables in a
@@ -112,12 +116,14 @@ public:
   };
 
   /// Returns a struct describing if the instruction 'I' can be a recurrence
-  /// variable of type 'Kind'. If the recurrence is a min/max pattern of
-  /// select(icmp()) this function advances the instruction pointer 'I' from the
-  /// compare instruction to the select instruction and stores this pointer in
-  /// 'PatternLastInst' member of the returned struct.
-  static InstDesc isRecurrenceInstr(Instruction *I, RecurKind Kind,
-                                    InstDesc &Prev, FastMathFlags FMF);
+  /// variable of type 'Kind' for a Loop \p L and reduction PHI \p Phi.
+  /// If the recurrence is a min/max pattern of select(icmp()) this function
+  /// advances the instruction pointer 'I' from the compare instruction to the
+  /// select instruction and stores this pointer in 'PatternLastInst' member of
+  /// the returned struct.
+  static InstDesc isRecurrenceInstr(Loop *L, PHINode *Phi, Instruction *I,
+                                    RecurKind Kind, InstDesc &Prev,
+                                    FastMathFlags FuncFMF);
 
   /// Returns true if instruction I has multiple uses in Insts
   static bool hasMultipleUsesOf(Instruction *I,
@@ -127,20 +133,29 @@ public:
   /// Returns true if all uses of the instruction I is within the Set.
   static bool areAllUsesIn(Instruction *I, SmallPtrSetImpl<Instruction *> &Set);
 
-  /// Returns a struct describing if the instruction is a
-  /// Select(ICmp(X, Y), X, Y) instruction pattern corresponding to a min(X, Y)
-  /// or max(X, Y). \p Prev specifies the description of an already processed
-  /// select instruction, so its corresponding cmp can be matched to it.
-  static InstDesc isMinMaxSelectCmpPattern(Instruction *I,
-                                           const InstDesc &Prev);
+  /// Returns a struct describing if the instruction is a llvm.(s/u)(min/max),
+  /// llvm.minnum/maxnum or a Select(ICmp(X, Y), X, Y) pair of instructions
+  /// corresponding to a min(X, Y) or max(X, Y), matching the recurrence kind \p
+  /// Kind. \p Prev specifies the description of an already processed select
+  /// instruction, so its corresponding cmp can be matched to it.
+  static InstDesc isMinMaxPattern(Instruction *I, RecurKind Kind,
+                                  const InstDesc &Prev);
+
+  /// Returns a struct describing whether the instruction is either a
+  ///   Select(ICmp(A, B), X, Y), or
+  ///   Select(FCmp(A, B), X, Y)
+  /// where one of (X, Y) is a loop invariant integer and the other is a PHI
+  /// value. \p Prev specifies the description of an already processed select
+  /// instruction, so its corresponding cmp can be matched to it.
+  static InstDesc isSelectCmpPattern(Loop *Loop, PHINode *OrigPhi,
+                                     Instruction *I, InstDesc &Prev);
 
   /// Returns a struct describing if the instruction is a
   /// Select(FCmp(X, Y), (Z = X op PHINode), PHINode) instruction pattern.
   static InstDesc isConditionalRdxPattern(RecurKind Kind, Instruction *I);
 
   /// Returns identity corresponding to the RecurrenceKind.
-  static Constant *getRecurrenceIdentity(RecurKind K, Type *Tp,
-                                         FastMathFlags FMF);
+  Value *getRecurrenceIdentity(RecurKind K, Type *Tp, FastMathFlags FMF);
 
   /// Returns the opcode corresponding to the RecurrenceKind.
   static unsigned getOpcode(RecurKind Kind);
@@ -150,7 +165,7 @@ public:
   /// non-null, the minimal bit width needed to compute the reduction will be
   /// computed.
   static bool AddReductionVar(PHINode *Phi, RecurKind Kind, Loop *TheLoop,
-                              FastMathFlags FMF,
+                              FastMathFlags FuncFMF,
                               RecurrenceDescriptor &RedDes,
                               DemandedBits *DB = nullptr,
                               AssumptionCache *AC = nullptr,
@@ -220,6 +235,12 @@ public:
     return isIntMinMaxRecurrenceKind(Kind) || isFPMinMaxRecurrenceKind(Kind);
   }
 
+  /// Returns true if the recurrence kind is of the form
+  ///   select(cmp(),x,y) where one of (x,y) is loop invariant.
+  static bool isSelectCmpRecurrenceKind(RecurKind Kind) {
+    return Kind == RecurKind::SelectICmp || Kind == RecurKind::SelectFCmp;
+  }
+
   /// Returns the type of the recurrence. This type can be narrower than the
   /// actual type of the Phi if the recurrence has been type-promoted.
   Type *getRecurrenceType() const { return RecurrenceType; }
@@ -329,6 +350,11 @@ public:
                           : Instruction::BinaryOpsEnd;
   }
 
+  Type *getElementType() const {
+    assert(IK == IK_PtrInduction && "Only pointer induction has element type");
+    return ElementType;
+  }
+
   /// Returns a reference to the type cast instructions in the induction
   /// update chain, that are redundant when guarded with a runtime
   /// SCEV overflow check.
@@ -340,6 +366,7 @@ private:
   /// Private constructor - used by \c isInductionPHI.
   InductionDescriptor(Value *Start, InductionKind K, const SCEV *Step,
                       BinaryOperator *InductionBinOp = nullptr,
+                      Type *ElementType = nullptr,
                       SmallVectorImpl<Instruction *> *Casts = nullptr);
 
   /// Start value.
@@ -350,6 +377,9 @@ private:
   const SCEV *Step = nullptr;
   // Instruction that advances induction variable.
   BinaryOperator *InductionBinOp = nullptr;
+  // Element type for pointer induction variables.
+  // TODO: This can be dropped once support for typed pointers is removed.
+  Type *ElementType = nullptr;
   // Instructions used for type-casts of the induction variable,
   // that are redundant when guarded with a runtime SCEV overflow check.
   SmallVector<Instruction *, 2> RedundantCasts;
diff --git a/llvm/include/llvm/Analysis/IVUsers.h b/llvm/include/llvm/Analysis/IVUsers.h
index f8ea3bcca229..e2026a4d5875 100644
--- a/llvm/include/llvm/Analysis/IVUsers.h
+++ b/llvm/include/llvm/Analysis/IVUsers.h
@@ -157,9 +157,6 @@ public:
 
   /// dump - This method is used for debugging.
   void dump() const;
-
-protected:
-  bool AddUsersImpl(Instruction *I, SmallPtrSetImpl<Loop*> &SimpleLoopNests);
 };
 
 Pass *createIVUsersPass();
diff --git a/llvm/include/llvm/Analysis/InlineAdvisor.h b/llvm/include/llvm/Analysis/InlineAdvisor.h
index c27aaf0db8f2..9f9bc3a5e71b 100644
--- a/llvm/include/llvm/Analysis/InlineAdvisor.h
+++ b/llvm/include/llvm/Analysis/InlineAdvisor.h
@@ -22,6 +22,7 @@ class CallBase;
 class Function;
 class Module;
 class OptimizationRemarkEmitter;
+struct ReplayInlinerSettings;
 
 /// There are 3 scenarios we can use the InlineAdvisor:
 /// - Default - use manual heuristics.
@@ -143,7 +144,11 @@ public:
   /// be up-to-date wrt previous inlining decisions. \p MandatoryOnly indicates
   /// only mandatory (always-inline) call sites should be recommended - this
   /// allows the InlineAdvisor track such inlininings.
-  /// Returns an InlineAdvice with the inlining recommendation.
+  /// Returns:
+  /// - An InlineAdvice with the inlining recommendation.
+  /// - Null when no recommendation is made (https://reviews.llvm.org/D110658).
+  /// TODO: Consider removing the Null return scenario by incorporating the
+  /// SampleProfile inliner into an InlineAdvisor
   std::unique_ptr<InlineAdvice> getAdvice(CallBase &CB,
                                           bool MandatoryOnly = false);
 
@@ -157,6 +162,12 @@ public:
   /// to prepare for a partial update.
   virtual void onPassExit() {}
 
+  /// Called when the module is invalidated. We let the advisor implementation
+  /// decide what to refresh - in the case of the development mode
+  /// implementation, for example, we wouldn't want to delete the whole object
+  /// and need to re-load the model evaluator.
+  virtual void onModuleInvalidated() {}
+
 protected:
   InlineAdvisor(Module &M, FunctionAnalysisManager &FAM);
   virtual std::unique_ptr<InlineAdvice> getAdviceImpl(CallBase &CB) = 0;
@@ -219,15 +230,18 @@ public:
   InlineAdvisorAnalysis() = default;
   struct Result {
     Result(Module &M, ModuleAnalysisManager &MAM) : M(M), MAM(MAM) {}
-    bool invalidate(Module &, const PreservedAnalyses &,
+    bool invalidate(Module &, const PreservedAnalyses &PA,
                     ModuleAnalysisManager::Invalidator &) {
-      // InlineAdvisor must be preserved across analysis invalidations.
-      return false;
+      if (Advisor && !PA.areAllPreserved())
+        Advisor->onModuleInvalidated();
+      // Check whether the analysis has been explicitly invalidated. Otherwise,
+      // it's stateless and remains preserved.
+      auto PAC = PA.getChecker<InlineAdvisorAnalysis>();
+      return !PAC.preservedWhenStateless();
     }
     bool tryCreate(InlineParams Params, InliningAdvisorMode Mode,
-                   StringRef ReplayFile);
+                   const ReplayInlinerSettings &ReplaySettings);
     InlineAdvisor *getAdvisor() const { return Advisor.get(); }
-    void clear() { Advisor.reset(); }
 
   private:
     Module &M;
@@ -263,12 +277,16 @@ shouldInline(CallBase &CB, function_ref<InlineCost(CallBase &CB)> GetInlineCost,
 /// Emit ORE message.
 void emitInlinedInto(OptimizationRemarkEmitter &ORE, DebugLoc DLoc,
                      const BasicBlock *Block, const Function &Callee,
-                     const Function &Caller, const InlineCost &IC,
-                     bool ForProfileContext = false,
+                     const Function &Caller, bool IsMandatory,
+                     function_ref<void(OptimizationRemark &)> ExtraContext = {},
                      const char *PassName = nullptr);
 
-/// get call site location as string
-std::string getCallSiteLocation(DebugLoc DLoc);
+/// Emit ORE message based in cost (default heuristic).
+void emitInlinedIntoBasedOnCost(OptimizationRemarkEmitter &ORE, DebugLoc DLoc,
+                                const BasicBlock *Block, const Function &Callee,
+                                const Function &Caller, const InlineCost &IC,
+                                bool ForProfileContext = false,
+                                const char *PassName = nullptr);
 
 /// Add location info to ORE message.
 void addLocationToRemarks(OptimizationRemark &Remark, DebugLoc DLoc);
diff --git a/llvm/include/llvm/Analysis/InlineCost.h b/llvm/include/llvm/Analysis/InlineCost.h
index 4e1b28d4633f..b22841343b1a 100644
--- a/llvm/include/llvm/Analysis/InlineCost.h
+++ b/llvm/include/llvm/Analysis/InlineCost.h
@@ -213,6 +213,9 @@ struct InlineParams {
 
   /// Indicate whether we should allow inline deferral.
   Optional<bool> EnableDeferral = true;
+
+  /// Indicate whether we allow inlining for recursive call.
+  Optional<bool> AllowRecursiveCall = false;
 };
 
 /// Generate the parameters to tune the inline cost analysis based only on the
diff --git a/llvm/include/llvm/Analysis/InlineOrder.h b/llvm/include/llvm/Analysis/InlineOrder.h
new file mode 100644
index 000000000000..def3192356f4
--- /dev/null
+++ b/llvm/include/llvm/Analysis/InlineOrder.h
@@ -0,0 +1,172 @@
+//===- InlineOrder.h - Inlining order abstraction -*- C++ ---*-------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+#ifndef LLVM_ANALYSIS_INLINEORDER_H
+#define LLVM_ANALYSIS_INLINEORDER_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include <algorithm>
+#include <utility>
+
+namespace llvm {
+class CallBase;
+class Function;
+class Module;
+
+template <typename T> class InlineOrder {
+public:
+  using reference = T &;
+  using const_reference = const T &;
+
+  virtual ~InlineOrder() {}
+
+  virtual size_t size() = 0;
+
+  virtual void push(const T &Elt) = 0;
+
+  virtual T pop() = 0;
+
+  virtual const_reference front() = 0;
+
+  virtual void erase_if(function_ref<bool(T)> Pred) = 0;
+
+  bool empty() { return !size(); }
+};
+
+template <typename T, typename Container = SmallVector<T, 16>>
+class DefaultInlineOrder : public InlineOrder<T> {
+  using reference = T &;
+  using const_reference = const T &;
+
+public:
+  size_t size() override { return Calls.size() - FirstIndex; }
+
+  void push(const T &Elt) override { Calls.push_back(Elt); }
+
+  T pop() override {
+    assert(size() > 0);
+    return Calls[FirstIndex++];
+  }
+
+  const_reference front() override {
+    assert(size() > 0);
+    return Calls[FirstIndex];
+  }
+
+  void erase_if(function_ref<bool(T)> Pred) override {
+    Calls.erase(std::remove_if(Calls.begin() + FirstIndex, Calls.end(), Pred),
+                Calls.end());
+  }
+
+private:
+  Container Calls;
+  size_t FirstIndex = 0;
+};
+
+class InlineSizePriority {
+public:
+  InlineSizePriority(int Size) : Size(Size) {}
+
+  static bool isMoreDesirable(const InlineSizePriority &S1,
+                              const InlineSizePriority &S2) {
+    return S1.Size < S2.Size;
+  }
+
+  static InlineSizePriority evaluate(CallBase *CB) {
+    Function *Callee = CB->getCalledFunction();
+    return InlineSizePriority(Callee->getInstructionCount());
+  }
+
+  int Size;
+};
+
+template <typename PriorityT>
+class PriorityInlineOrder : public InlineOrder<std::pair<CallBase *, int>> {
+  using T = std::pair<CallBase *, int>;
+  using HeapT = std::pair<CallBase *, PriorityT>;
+  using reference = T &;
+  using const_reference = const T &;
+
+  static bool cmp(const HeapT &P1, const HeapT &P2) {
+    return PriorityT::isMoreDesirable(P2.second, P1.second);
+  }
+
+  // A call site could become less desirable for inlining because of the size
+  // growth from prior inlining into the callee. This method is used to lazily
+  // update the desirability of a call site if it's decreasing. It is only
+  // called on pop() or front(), not every time the desirability changes. When
+  // the desirability of the front call site decreases, an updated one would be
+  // pushed right back into the heap. For simplicity, those cases where
+  // the desirability of a call site increases are ignored here.
+  void adjust() {
+    bool Changed = false;
+    do {
+      CallBase *CB = Heap.front().first;
+      const PriorityT PreviousGoodness = Heap.front().second;
+      const PriorityT CurrentGoodness = PriorityT::evaluate(CB);
+      Changed = PriorityT::isMoreDesirable(PreviousGoodness, CurrentGoodness);
+      if (Changed) {
+        std::pop_heap(Heap.begin(), Heap.end(), cmp);
+        Heap.pop_back();
+        Heap.push_back({CB, CurrentGoodness});
+        std::push_heap(Heap.begin(), Heap.end(), cmp);
+      }
+    } while (Changed);
+  }
+
+public:
+  size_t size() override { return Heap.size(); }
+
+  void push(const T &Elt) override {
+    CallBase *CB = Elt.first;
+    const int InlineHistoryID = Elt.second;
+    const PriorityT Goodness = PriorityT::evaluate(CB);
+
+    Heap.push_back({CB, Goodness});
+    std::push_heap(Heap.begin(), Heap.end(), cmp);
+    InlineHistoryMap[CB] = InlineHistoryID;
+  }
+
+  T pop() override {
+    assert(size() > 0);
+    adjust();
+
+    CallBase *CB = Heap.front().first;
+    T Result = std::make_pair(CB, InlineHistoryMap[CB]);
+    InlineHistoryMap.erase(CB);
+    std::pop_heap(Heap.begin(), Heap.end(), cmp);
+    Heap.pop_back();
+    return Result;
+  }
+
+  const_reference front() override {
+    assert(size() > 0);
+    adjust();
+
+    CallBase *CB = Heap.front().first;
+    return *InlineHistoryMap.find(CB);
+  }
+
+  void erase_if(function_ref<bool(T)> Pred) override {
+    auto PredWrapper = [=](HeapT P) -> bool {
+      return Pred(std::make_pair(P.first, 0));
+    };
+    llvm::erase_if(Heap, PredWrapper);
+    std::make_heap(Heap.begin(), Heap.end(), cmp);
+  }
+
+private:
+  SmallVector<HeapT, 16> Heap;
+  DenseMap<CallBase *, int> InlineHistoryMap;
+};
+} // namespace llvm
+#endif // LLVM_ANALYSIS_INLINEORDER_H
diff --git a/llvm/include/llvm/Analysis/InstructionSimplify.h b/llvm/include/llvm/Analysis/InstructionSimplify.h
index efaf1847276b..f0f8e4bc9175 100644
--- a/llvm/include/llvm/Analysis/InstructionSimplify.h
+++ b/llvm/include/llvm/Analysis/InstructionSimplify.h
@@ -248,7 +248,7 @@ Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
                           const SimplifyQuery &Q);
 
 /// Given operands for a GetElementPtrInst, fold the result or return null.
-Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops,
+Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops, bool InBounds,
                        const SimplifyQuery &Q);
 
 /// Given operands for an InsertValueInst, fold the result or return null.
diff --git a/llvm/include/llvm/Analysis/LazyCallGraph.h b/llvm/include/llvm/Analysis/LazyCallGraph.h
index ca276d2f3cf8..0580f4d7b226 100644
--- a/llvm/include/llvm/Analysis/LazyCallGraph.h
+++ b/llvm/include/llvm/Analysis/LazyCallGraph.h
@@ -145,7 +145,7 @@ public:
     /// around but clear them.
     explicit operator bool() const;
 
-    /// Returnss the \c Kind of the edge.
+    /// Returns the \c Kind of the edge.
     Kind getKind() const;
 
     /// Test whether the edge represents a direct call to a function.
@@ -307,9 +307,9 @@ public:
 
   /// A node in the call graph.
   ///
-  /// This represents a single node. It's primary roles are to cache the list of
-  /// callees, de-duplicate and provide fast testing of whether a function is
-  /// a callee, and facilitate iteration of child nodes in the graph.
+  /// This represents a single node. Its primary roles are to cache the list of
+  /// callees, de-duplicate and provide fast testing of whether a function is a
+  /// callee, and facilitate iteration of child nodes in the graph.
   ///
   /// The node works much like an optional in order to lazily populate the
   /// edges of each node. Until populated, there are no edges. Once populated,
@@ -392,7 +392,7 @@ public:
 
     /// Internal helper to directly replace the function with a new one.
     ///
-    /// This is used to facilitate tranfsormations which need to replace the
+    /// This is used to facilitate transformations which need to replace the
     /// formal Function object but directly move the body and users from one to
     /// the other.
     void replaceFunction(Function &NewF);
@@ -419,7 +419,7 @@ public:
   /// outer structure. SCCs do not support mutation of the call graph, that
   /// must be done through the containing \c RefSCC in order to fully reason
   /// about the ordering and connections of the graph.
-  class SCC {
+  class LLVM_EXTERNAL_VISIBILITY SCC {
     friend class LazyCallGraph;
     friend class LazyCallGraph::Node;
 
@@ -435,7 +435,7 @@ public:
       Nodes.clear();
     }
 
-    /// Print a short descrtiption useful for debugging or logging.
+    /// Print a short description useful for debugging or logging.
     ///
     /// We print the function names in the SCC wrapped in '()'s and skipping
     /// the middle functions if there are a large number.
@@ -467,9 +467,10 @@ public:
     /// Verify invariants about the SCC.
     ///
     /// This will attempt to validate all of the basic invariants within an
-    /// SCC, but not that it is a strongly connected componet per-se. Primarily
-    /// useful while building and updating the graph to check that basic
-    /// properties are in place rather than having inexplicable crashes later.
+    /// SCC, but not that it is a strongly connected component per se.
+    /// Primarily useful while building and updating the graph to check that
+    /// basic properties are in place rather than having inexplicable crashes
+    /// later.
     void verify();
 #endif
 
@@ -511,7 +512,7 @@ public:
 
     /// Provide a short name by printing this SCC to a std::string.
     ///
-    /// This copes with the fact that we don't have a name per-se for an SCC
+    /// This copes with the fact that we don't have a name per se for an SCC
     /// while still making the use of this in debugging and logging useful.
     std::string getName() const {
       std::string Name;
@@ -644,7 +645,7 @@ public:
 
     /// Provide a short name by printing this RefSCC to a std::string.
     ///
-    /// This copes with the fact that we don't have a name per-se for an RefSCC
+    /// This copes with the fact that we don't have a name per se for an RefSCC
     /// while still making the use of this in debugging and logging useful.
     std::string getName() const {
       std::string Name;
@@ -1085,47 +1086,9 @@ public:
   /// updates that set with every constant visited.
   ///
   /// For each defined function, calls \p Callback with that function.
-  template <typename CallbackT>
   static void visitReferences(SmallVectorImpl<Constant *> &Worklist,
                               SmallPtrSetImpl<Constant *> &Visited,
-                              CallbackT Callback) {
-    while (!Worklist.empty()) {
-      Constant *C = Worklist.pop_back_val();
-
-      if (Function *F = dyn_cast<Function>(C)) {
-        if (!F->isDeclaration())
-          Callback(*F);
-        continue;
-      }
-
-      // The blockaddress constant expression is a weird special case, we can't
-      // generically walk its operands the way we do for all other constants.
-      if (BlockAddress *BA = dyn_cast<BlockAddress>(C)) {
-        // If we've already visited the function referred to by the block
-        // address, we don't need to revisit it.
-        if (Visited.count(BA->getFunction()))
-          continue;
-
-        // If all of the blockaddress' users are instructions within the
-        // referred to function, we don't need to insert a cycle.
-        if (llvm::all_of(BA->users(), [&](User *U) {
-              if (Instruction *I = dyn_cast<Instruction>(U))
-                return I->getFunction() == BA->getFunction();
-              return false;
-            }))
-          continue;
-
-        // Otherwise we should go visit the referred to function.
-        Visited.insert(BA->getFunction());
-        Worklist.push_back(BA->getFunction());
-        continue;
-      }
-
-      for (Value *Op : C->operand_values())
-        if (Visited.insert(cast<Constant>(Op)).second)
-          Worklist.push_back(cast<Constant>(Op));
-    }
-  }
+                              function_ref<void(Function &)> Callback);
 
   ///@}
 
diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
index 0a0ef1536caf..2b4edfac61fc 100644
--- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -177,21 +177,11 @@ public:
 
   /// Register the location (instructions are given increasing numbers)
   /// of a write access.
-  void addAccess(StoreInst *SI) {
-    Value *Ptr = SI->getPointerOperand();
-    Accesses[MemAccessInfo(Ptr, true)].push_back(AccessIdx);
-    InstMap.push_back(SI);
-    ++AccessIdx;
-  }
+  void addAccess(StoreInst *SI);
 
   /// Register the location (instructions are given increasing numbers)
   /// of a write access.
-  void addAccess(LoadInst *LI) {
-    Value *Ptr = LI->getPointerOperand();
-    Accesses[MemAccessInfo(Ptr, false)].push_back(AccessIdx);
-    InstMap.push_back(LI);
-    ++AccessIdx;
-  }
+  void addAccess(LoadInst *LI);
 
   /// Check whether the dependencies between the accesses are safe.
   ///
@@ -664,15 +654,14 @@ Value *stripIntegerCast(Value *V);
 /// If necessary this method will version the stride of the pointer according
 /// to \p PtrToStride and therefore add further predicates to \p PSE.
 ///
-/// If \p OrigPtr is not null, use it to look up the stride value instead of \p
-/// Ptr.  \p PtrToStride provides the mapping between the pointer value and its
+/// \p PtrToStride provides the mapping between the pointer value and its
 /// stride as collected by LoopVectorizationLegality::collectStridedAccess.
 const SCEV *replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
                                       const ValueToValueMap &PtrToStride,
-                                      Value *Ptr, Value *OrigPtr = nullptr);
+                                      Value *Ptr);
 
-/// If the pointer has a constant stride return it in units of its
-/// element size.  Otherwise return zero.
+/// If the pointer has a constant stride return it in units of the access type
+/// size.  Otherwise return zero.
 ///
 /// Ensure that it does not wrap in the address space, assuming the predicate
 /// associated with \p PSE is true.
@@ -681,7 +670,8 @@ const SCEV *replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
 /// to \p PtrToStride and therefore add further predicates to \p PSE.
 /// The \p Assume parameter indicates if we are allowed to make additional
 /// run-time assumptions.
-int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp,
+int64_t getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr,
+                     const Loop *Lp,
                      const ValueToValueMap &StridesMap = ValueToValueMap(),
                      bool Assume = false, bool ShouldCheckWrap = true);
 
diff --git a/llvm/include/llvm/Analysis/LoopAnalysisManager.h b/llvm/include/llvm/Analysis/LoopAnalysisManager.h
index 92db1d67fc4e..bc8a1e74e447 100644
--- a/llvm/include/llvm/Analysis/LoopAnalysisManager.h
+++ b/llvm/include/llvm/Analysis/LoopAnalysisManager.h
@@ -58,6 +58,7 @@ struct LoopStandardAnalysisResults {
   TargetLibraryInfo &TLI;
   TargetTransformInfo &TTI;
   BlockFrequencyInfo *BFI;
+  BranchProbabilityInfo *BPI;
   MemorySSA *MSSA;
 };
 
diff --git a/llvm/include/llvm/Analysis/LoopInfo.h b/llvm/include/llvm/Analysis/LoopInfo.h
index 164ec50e47bc..15c9d911ab80 100644
--- a/llvm/include/llvm/Analysis/LoopInfo.h
+++ b/llvm/include/llvm/Analysis/LoopInfo.h
@@ -527,7 +527,7 @@ extern template class LoopBase<BasicBlock, Loop>;
 
 /// Represents a single loop in the control flow graph.  Note that not all SCCs
 /// in the CFG are necessarily loops.
-class Loop : public LoopBase<BasicBlock, Loop> {
+class LLVM_EXTERNAL_VISIBILITY Loop : public LoopBase<BasicBlock, Loop> {
 public:
   /// A range representing the start and end location of a loop.
   class LocRange {
@@ -950,7 +950,7 @@ public:
   ///
   /// Note that because loops form a forest of trees, preorder is equivalent to
   /// reverse postorder.
-  SmallVector<LoopT *, 4> getLoopsInPreorder();
+  SmallVector<LoopT *, 4> getLoopsInPreorder() const;
 
   /// Return all of the loops in the function in preorder across the loop
   /// nests, with siblings in *reverse* program order.
@@ -960,7 +960,7 @@ public:
   ///
   /// Also note that this is *not* a reverse preorder. Only the siblings are in
   /// reverse program order.
-  SmallVector<LoopT *, 4> getLoopsInReverseSiblingPreorder();
+  SmallVector<LoopT *, 4> getLoopsInReverseSiblingPreorder() const;
 
   /// Return the inner most loop that BB lives in. If a basic block is in no
   /// loop (for example the entry node), null is returned.
@@ -1213,6 +1213,13 @@ public:
 
 };
 
+/// Enable verification of loop info.
+///
+/// The flag enables checks which are expensive and are disabled by default
+/// unless the `EXPENSIVE_CHECKS` macro is defined.  The `-verify-loop-info`
+/// flag allows the checks to be enabled selectively without re-compilation.
+extern bool VerifyLoopInfo;
+
 // Allow clients to walk the list of nested loops...
 template <> struct GraphTraits<const Loop *> {
   typedef const Loop *NodeRef;
@@ -1305,6 +1312,10 @@ bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name);
 llvm::Optional<int>
 getOptionalIntLoopAttribute(const Loop *TheLoop, StringRef Name);
 
+/// Find named metadata for a loop with an integer value. Return \p Default if
+/// not set.
+int getIntLoopAttribute(const Loop *TheLoop, StringRef Name, int Default = 0);
+
 /// Find string metadata for loop
 ///
 /// If it has a value (e.g. {"llvm.distribute", 1} return the value as an
diff --git a/llvm/include/llvm/Analysis/LoopInfoImpl.h b/llvm/include/llvm/Analysis/LoopInfoImpl.h
index 2cc9afb7c2cd..b8b8330d0fe1 100644
--- a/llvm/include/llvm/Analysis/LoopInfoImpl.h
+++ b/llvm/include/llvm/Analysis/LoopInfoImpl.h
@@ -574,7 +574,8 @@ void LoopInfoBase<BlockT, LoopT>::analyze(const DomTreeBase<BlockT> &DomTree) {
 }
 
 template <class BlockT, class LoopT>
-SmallVector<LoopT *, 4> LoopInfoBase<BlockT, LoopT>::getLoopsInPreorder() {
+SmallVector<LoopT *, 4>
+LoopInfoBase<BlockT, LoopT>::getLoopsInPreorder() const {
   SmallVector<LoopT *, 4> PreOrderLoops, PreOrderWorklist;
   // The outer-most loop actually goes into the result in the same relative
   // order as we walk it. But LoopInfo stores the top level loops in reverse
@@ -592,7 +593,7 @@ SmallVector<LoopT *, 4> LoopInfoBase<BlockT, LoopT>::getLoopsInPreorder() {
 
 template <class BlockT, class LoopT>
 SmallVector<LoopT *, 4>
-LoopInfoBase<BlockT, LoopT>::getLoopsInReverseSiblingPreorder() {
+LoopInfoBase<BlockT, LoopT>::getLoopsInReverseSiblingPreorder() const {
   SmallVector<LoopT *, 4> PreOrderLoops, PreOrderWorklist;
   // The outer-most loop actually goes into the result in the same relative
   // order as we walk it. LoopInfo stores the top level loops in reverse
diff --git a/llvm/include/llvm/Analysis/LoopNestAnalysis.h b/llvm/include/llvm/Analysis/LoopNestAnalysis.h
index 9a749a1c8eae..3d4a064cf7e3 100644
--- a/llvm/include/llvm/Analysis/LoopNestAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopNestAnalysis.h
@@ -21,11 +21,14 @@
 namespace llvm {
 
 using LoopVectorTy = SmallVector<Loop *, 8>;
+
 class LPMUpdater;
 
 /// This class represents a loop nest and can be used to query its properties.
-class LoopNest {
+class LLVM_EXTERNAL_VISIBILITY LoopNest {
 public:
+  using InstrVectorTy = SmallVector<const Instruction *>;
+
   /// Construct a loop nest rooted by loop \p Root.
   LoopNest(Loop &Root, ScalarEvolution &SE);
 
@@ -48,6 +51,12 @@ public:
   static bool arePerfectlyNested(const Loop &OuterLoop, const Loop &InnerLoop,
                                  ScalarEvolution &SE);
 
+  /// Return a vector of instructions that prevent the LoopNest given
+  /// by loops \p OuterLoop and \p InnerLoop from being perfect.
+  static InstrVectorTy getInterveningInstructions(const Loop &OuterLoop,
+                                                  const Loop &InnerLoop,
+                                                  ScalarEvolution &SE);
+
   /// Return the maximum nesting depth of the loop nest rooted by loop \p Root.
   /// For example given the loop nest:
   /// \code
@@ -150,6 +159,17 @@ public:
 protected:
   const unsigned MaxPerfectDepth; // maximum perfect nesting depth level.
   LoopVectorTy Loops; // the loops in the nest (in breadth first order).
+
+private:
+  enum LoopNestEnum {
+    PerfectLoopNest,
+    ImperfectLoopNest,
+    InvalidLoopStructure,
+    OuterLoopLowerBoundUnknown
+  };
+  static LoopNestEnum analyzeLoopNestForPerfectNest(const Loop &OuterLoop,
+                                                    const Loop &InnerLoop,
+                                                    ScalarEvolution &SE);
 };
 
 raw_ostream &operator<<(raw_ostream &, const LoopNest &);
diff --git a/llvm/include/llvm/Analysis/MLInlineAdvisor.h b/llvm/include/llvm/Analysis/MLInlineAdvisor.h
index 54edbb823263..a218561e61c7 100644
--- a/llvm/include/llvm/Analysis/MLInlineAdvisor.h
+++ b/llvm/include/llvm/Analysis/MLInlineAdvisor.h
@@ -38,6 +38,7 @@ public:
   bool isForcedToStop() const { return ForceStop; }
   int64_t getLocalCalls(Function &F);
   const MLModelRunner &getModelRunner() const { return *ModelRunner.get(); }
+  void onModuleInvalidated() override { Invalid = true; }
 
 protected:
   std::unique_ptr<InlineAdvice> getAdviceImpl(CallBase &CB) override;
@@ -55,6 +56,7 @@ protected:
 private:
   int64_t getModuleIRSize() const;
 
+  bool Invalid = true;
   std::unique_ptr<CallGraph> CG;
 
   int64_t NodeCount = 0;
diff --git a/llvm/include/llvm/Analysis/MemorySSA.h b/llvm/include/llvm/Analysis/MemorySSA.h
index f40b99968fd3..48aeef371e3d 100644
--- a/llvm/include/llvm/Analysis/MemorySSA.h
+++ b/llvm/include/llvm/Analysis/MemorySSA.h
@@ -106,9 +106,6 @@
 
 namespace llvm {
 
-/// Enables memory ssa as a dependency for loop passes.
-extern cl::opt<bool> EnableMSSALoopDependency;
-
 class AllocaInst;
 class Function;
 class Instruction;
@@ -786,21 +783,22 @@ public:
   /// dominates Use \p B.
   bool dominates(const MemoryAccess *A, const Use &B) const;
 
+  enum class VerificationLevel { Fast, Full };
   /// Verify that MemorySSA is self consistent (IE definitions dominate
   /// all uses, uses appear in the right places).  This is used by unit tests.
-  void verifyMemorySSA() const;
+  void verifyMemorySSA(VerificationLevel = VerificationLevel::Fast) const;
 
   /// Used in various insertion functions to specify whether we are talking
   /// about the beginning or end of a block.
   enum InsertionPlace { Beginning, End, BeforeTerminator };
 
 protected:
-  // Used by Memory SSA annotater, dumpers, and wrapper pass
-  friend class MemorySSAAnnotatedWriter;
+  // Used by Memory SSA dumpers and wrapper pass
   friend class MemorySSAPrinterLegacyPass;
   friend class MemorySSAUpdater;
 
-  void verifyOrderingDominationAndDefUses(Function &F) const;
+  void verifyOrderingDominationAndDefUses(
+      Function &F, VerificationLevel = VerificationLevel::Fast) const;
   void verifyDominationNumbers(const Function &F) const;
   void verifyPrevDefInPhis(Function &F) const;
 
@@ -898,6 +896,13 @@ private:
   unsigned NextID;
 };
 
+/// Enables verification of MemorySSA.
+///
+/// The checks which this flag enables is exensive and disabled by default
+/// unless `EXPENSIVE_CHECKS` is defined.  The flag `-verify-memoryssa` can be
+/// used to selectively enable the verification without re-compilation.
+extern bool VerifyMemorySSA;
+
 // Internal MemorySSA utils, for use by MemorySSA classes and walkers
 class MemorySSAUtil {
 protected:
@@ -956,6 +961,17 @@ public:
   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
 };
 
+/// Printer pass for \c MemorySSA via the walker.
+class MemorySSAWalkerPrinterPass
+    : public PassInfoMixin<MemorySSAWalkerPrinterPass> {
+  raw_ostream &OS;
+
+public:
+  explicit MemorySSAWalkerPrinterPass(raw_ostream &OS) : OS(OS) {}
+
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
 /// Verifier pass for \c MemorySSA.
 struct MemorySSAVerifierPass : PassInfoMixin<MemorySSAVerifierPass> {
   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
diff --git a/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h b/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h
index 62bdade95d96..17062ab907a6 100644
--- a/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h
+++ b/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h
@@ -78,14 +78,17 @@ inline const Value *GetUnderlyingObjCPtr(const Value *V) {
 }
 
 /// A wrapper for GetUnderlyingObjCPtr used for results memoization.
-inline const Value *
-GetUnderlyingObjCPtrCached(const Value *V,
-                           DenseMap<const Value *, WeakTrackingVH> &Cache) {
-  if (auto InCache = Cache.lookup(V))
-    return InCache;
+inline const Value *GetUnderlyingObjCPtrCached(
+    const Value *V,
+    DenseMap<const Value *, std::pair<WeakVH, WeakTrackingVH>> &Cache) {
+  // The entry is invalid if either value handle is null.
+  auto InCache = Cache.lookup(V);
+  if (InCache.first && InCache.second)
+    return InCache.second;
 
   const Value *Computed = GetUnderlyingObjCPtr(V);
-  Cache[V] = const_cast<Value *>(Computed);
+  Cache[V] =
+      std::make_pair(const_cast<Value *>(V), const_cast<Value *>(Computed));
   return Computed;
 }
 
@@ -168,8 +171,8 @@ bool IsPotentialRetainableObjPtr(const Value *Op, AAResults &AA);
 /// Helper for GetARCInstKind. Determines what kind of construct CS
 /// is.
 inline ARCInstKind GetCallSiteClass(const CallBase &CB) {
-  for (auto I = CB.arg_begin(), E = CB.arg_end(); I != E; ++I)
-    if (IsPotentialRetainableObjPtr(*I))
+  for (const Use &U : CB.args())
+    if (IsPotentialRetainableObjPtr(U))
       return CB.onlyReadsMemory() ? ARCInstKind::User : ARCInstKind::CallOrUser;
 
   return CB.onlyReadsMemory() ? ARCInstKind::None : ARCInstKind::Call;
@@ -204,11 +207,10 @@ inline bool IsObjCIdentifiedObject(const Value *V) {
         return true;
 
       StringRef Section = GV->getSection();
-      if (Section.find("__message_refs") != StringRef::npos ||
-          Section.find("__objc_classrefs") != StringRef::npos ||
-          Section.find("__objc_superrefs") != StringRef::npos ||
-          Section.find("__objc_methname") != StringRef::npos ||
-          Section.find("__cstring") != StringRef::npos)
+      if (Section.contains("__message_refs") ||
+          Section.contains("__objc_classrefs") ||
+          Section.contains("__objc_superrefs") ||
+          Section.contains("__objc_methname") || Section.contains("__cstring"))
         return true;
     }
   }
diff --git a/llvm/include/llvm/Analysis/ObjCARCUtil.h b/llvm/include/llvm/Analysis/ObjCARCUtil.h
index 2566bfbcf61c..362dd6c29992 100644
--- a/llvm/include/llvm/Analysis/ObjCARCUtil.h
+++ b/llvm/include/llvm/Analysis/ObjCARCUtil.h
@@ -11,9 +11,11 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_IR_OBJCARCUTIL_H
-#define LLVM_IR_OBJCARCUTIL_H
+#ifndef LLVM_ANALYSIS_OBJCARCUTIL_H
+#define LLVM_ANALYSIS_OBJCARCUTIL_H
 
+#include "llvm/Analysis/ObjCARCInstKind.h"
+#include "llvm/IR/Function.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/LLVMContext.h"
 
@@ -24,13 +26,6 @@ inline const char *getRVMarkerModuleFlagStr() {
   return "clang.arc.retainAutoreleasedReturnValueMarker";
 }
 
-enum AttachedCallOperandBundle : unsigned { RVOB_Retain, RVOB_Claim };
-
-inline AttachedCallOperandBundle
-getAttachedCallOperandBundleEnum(bool IsRetain) {
-  return IsRetain ? RVOB_Retain : RVOB_Claim;
-}
-
 inline bool hasAttachedCallOpBundle(const CallBase *CB) {
   // Ignore the bundle if the return type is void. Global optimization passes
   // can turn the called function's return type to void. That should happen only
@@ -43,14 +38,32 @@ inline bool hasAttachedCallOpBundle(const CallBase *CB) {
              .hasValue();
 }
 
-inline bool hasAttachedCallOpBundle(const CallBase *CB, bool IsRetain) {
-  assert(hasAttachedCallOpBundle(CB) &&
-         "call doesn't have operand bundle clang_arc_attachedcall");
+/// This function returns operand bundle clang_arc_attachedcall's argument,
+/// which is the address of the ARC runtime function.
+inline Optional<Function *> getAttachedARCFunction(const CallBase *CB) {
   auto B = CB->getOperandBundle(LLVMContext::OB_clang_arc_attachedcall);
-  if (!B.hasValue())
-    return false;
-  return cast<ConstantInt>(B->Inputs[0])->getZExtValue() ==
-         getAttachedCallOperandBundleEnum(IsRetain);
+  if (!B.hasValue() || B->Inputs.size() == 0)
+    return None;
+
+  return cast<Function>(B->Inputs[0]);
+}
+
+/// Check whether the function is retainRV/claimRV.
+inline bool isRetainOrClaimRV(ARCInstKind Kind) {
+  return Kind == ARCInstKind::RetainRV || Kind == ARCInstKind::ClaimRV;
+}
+
+/// This function returns the ARCInstKind of the function attached to operand
+/// bundle clang_arc_attachedcall. It returns None if the call doesn't have the
+/// operand bundle or the operand is null. Otherwise it returns either RetainRV
+/// or ClaimRV.
+inline ARCInstKind getAttachedARCFunctionKind(const CallBase *CB) {
+  Optional<Function *> Fn = getAttachedARCFunction(CB);
+  if (!Fn.hasValue())
+    return ARCInstKind::None;
+  auto FnClass = GetFunctionClass(*Fn);
+  assert(isRetainOrClaimRV(FnClass) && "unexpected ARC runtime function");
+  return FnClass;
 }
 
 } // end namespace objcarc
diff --git a/llvm/include/llvm/Analysis/ProfileSummaryInfo.h b/llvm/include/llvm/Analysis/ProfileSummaryInfo.h
index c95404d96f4e..886800d8a0f5 100644
--- a/llvm/include/llvm/Analysis/ProfileSummaryInfo.h
+++ b/llvm/include/llvm/Analysis/ProfileSummaryInfo.h
@@ -134,9 +134,13 @@ public:
   bool isColdCount(uint64_t C) const;
   /// Returns true if count \p C is considered hot with regard to a given
   /// hot percentile cutoff value.
+  /// PercentileCutoff is encoded as a 6 digit decimal fixed point number, where
+  /// the first two digits are the whole part. E.g. 995000 for 99.5 percentile.
   bool isHotCountNthPercentile(int PercentileCutoff, uint64_t C) const;
   /// Returns true if count \p C is considered cold with regard to a given
   /// cold percentile cutoff value.
+  /// PercentileCutoff is encoded as a 6 digit decimal fixed point number, where
+  /// the first two digits are the whole part. E.g. 995000 for 99.5 percentile.
   bool isColdCountNthPercentile(int PercentileCutoff, uint64_t C) const;
   /// Returns true if BasicBlock \p BB is considered hot.
   bool isHotBlock(const BasicBlock *BB, BlockFrequencyInfo *BFI) const;
@@ -144,10 +148,14 @@ public:
   bool isColdBlock(const BasicBlock *BB, BlockFrequencyInfo *BFI) const;
   /// Returns true if BasicBlock \p BB is considered hot with regard to a given
   /// hot percentile cutoff value.
+  /// PercentileCutoff is encoded as a 6 digit decimal fixed point number, where
+  /// the first two digits are the whole part. E.g. 995000 for 99.5 percentile.
   bool isHotBlockNthPercentile(int PercentileCutoff, const BasicBlock *BB,
                                BlockFrequencyInfo *BFI) const;
   /// Returns true if BasicBlock \p BB is considered cold with regard to a given
   /// cold percentile cutoff value.
+  /// PercentileCutoff is encoded as a 6 digit decimal fixed point number, where
+  /// the first two digits are the whole part. E.g. 995000 for 99.5 percentile.
   bool isColdBlockNthPercentile(int PercentileCutoff, const BasicBlock *BB,
                                 BlockFrequencyInfo *BFI) const;
   /// Returns true if the call site \p CB is considered hot.
@@ -162,11 +170,11 @@ public:
   uint64_t getOrCompColdCountThreshold() const;
   /// Returns HotCountThreshold if set.
   uint64_t getHotCountThreshold() const {
-    return HotCountThreshold ? HotCountThreshold.getValue() : 0;
+    return HotCountThreshold.getValueOr(0);
   }
   /// Returns ColdCountThreshold if set.
   uint64_t getColdCountThreshold() const {
-    return ColdCountThreshold ? ColdCountThreshold.getValue() : 0;
+    return ColdCountThreshold.getValueOr(0);
   }
 
  private:
diff --git a/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h b/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h
index 3018bcc241d8..a0eb9af62205 100644
--- a/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h
+++ b/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h
@@ -20,6 +20,46 @@ class Function;
 class Module;
 class OptimizationRemarkEmitter;
 
+struct CallSiteFormat {
+  enum class Format : int {
+    Line,
+    LineColumn,
+    LineDiscriminator,
+    LineColumnDiscriminator
+  };
+
+  bool outputColumn() const {
+    return OutputFormat == Format::LineColumn ||
+           OutputFormat == Format::LineColumnDiscriminator;
+  }
+
+  bool outputDiscriminator() const {
+    return OutputFormat == Format::LineDiscriminator ||
+           OutputFormat == Format::LineColumnDiscriminator;
+  }
+
+  Format OutputFormat;
+};
+
+/// Replay Inliner Setup
+struct ReplayInlinerSettings {
+  enum class Scope : int { Function, Module };
+  enum class Fallback : int { Original, AlwaysInline, NeverInline };
+
+  StringRef ReplayFile;
+  Scope ReplayScope;
+  Fallback ReplayFallback;
+  CallSiteFormat ReplayFormat;
+};
+
+/// Get call site location as a string with the given format
+std::string formatCallSiteLocation(DebugLoc DLoc, const CallSiteFormat &Format);
+
+std::unique_ptr<InlineAdvisor> getReplayInlineAdvisor(
+    Module &M, FunctionAnalysisManager &FAM, LLVMContext &Context,
+    std::unique_ptr<InlineAdvisor> OriginalAdvisor,
+    const ReplayInlinerSettings &ReplaySettings, bool EmitRemarks);
+
 /// Replay inline advisor that uses optimization remarks from inlining of
 /// previous build to guide current inlining. This is useful for inliner tuning.
 class ReplayInlineAdvisor : public InlineAdvisor {
@@ -27,15 +67,24 @@ public:
   ReplayInlineAdvisor(Module &M, FunctionAnalysisManager &FAM,
                       LLVMContext &Context,
                       std::unique_ptr<InlineAdvisor> OriginalAdvisor,
-                      StringRef RemarksFile, bool EmitRemarks);
+                      const ReplayInlinerSettings &ReplaySettings,
+                      bool EmitRemarks);
   std::unique_ptr<InlineAdvice> getAdviceImpl(CallBase &CB) override;
   bool areReplayRemarksLoaded() const { return HasReplayRemarks; }
 
 private:
-  StringSet<> InlineSitesFromRemarks;
+  bool hasInlineAdvice(Function &F) const {
+    return (ReplaySettings.ReplayScope ==
+            ReplayInlinerSettings::Scope::Module) ||
+           CallersToReplay.contains(F.getName());
+  }
   std::unique_ptr<InlineAdvisor> OriginalAdvisor;
   bool HasReplayRemarks = false;
+  const ReplayInlinerSettings ReplaySettings;
   bool EmitRemarks = false;
+
+  StringMap<bool> InlineSitesFromRemarks;
+  StringSet<> CallersToReplay;
 };
 } // namespace llvm
 #endif // LLVM_ANALYSIS_REPLAYINLINEADVISOR_H
diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h
index ae9c73fede96..a2260688e3d6 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -25,7 +25,6 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseMapInfo.h"
 #include "llvm/ADT/FoldingSet.h"
-#include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/SetVector.h"
@@ -112,6 +111,24 @@ public:
   /// Note that NUW and NSW are also valid properties of a recurrence, and
   /// either implies NW. For convenience, NW will be set for a recurrence
   /// whenever either NUW or NSW are set.
+  ///
+  /// We require that the flag on a SCEV apply to the entire scope in which
+  /// that SCEV is defined.  A SCEV's scope is set of locations dominated by
+  /// a defining location, which is in turn described by the following rules:
+  /// * A SCEVUnknown is at the point of definition of the Value.
+  /// * A SCEVConstant is defined at all points.
+  /// * A SCEVAddRec is defined starting with the header of the associated
+  ///   loop.
+  /// * All other SCEVs are defined at the earlest point all operands are
+  ///   defined.
+  ///
+  /// The above rules describe a maximally hoisted form (without regards to
+  /// potential control dependence).  A SCEV is defined anywhere a
+  /// corresponding instruction could be defined in said maximally hoisted
+  /// form.  Note that SCEVUDivExpr (currently the only expression type which
+  /// can trap) can be defined per these rules in regions where it would trap
+  /// at runtime.  A SCEV being defined does not require the existence of any
+  /// instruction within the defined scope.
   enum NoWrapFlags {
     FlagAnyWrap = 0,    // No guarantee.
     FlagNW = (1 << 0),  // No self-wrap.
@@ -472,6 +489,10 @@ public:
   clearFlags(SCEV::NoWrapFlags Flags, SCEV::NoWrapFlags OffFlags) {
     return (SCEV::NoWrapFlags)(Flags & ~OffFlags);
   }
+  LLVM_NODISCARD static bool hasFlags(SCEV::NoWrapFlags Flags,
+                                      SCEV::NoWrapFlags TestFlags) {
+    return TestFlags == maskFlags(Flags, TestFlags);
+  };
 
   ScalarEvolution(Function &F, TargetLibraryInfo &TLI, AssumptionCache &AC,
                   DominatorTree &DT, LoopInfo &LI);
@@ -498,13 +519,26 @@ public:
   // Returns a wider type among {Ty1, Ty2}.
   Type *getWiderType(Type *Ty1, Type *Ty2) const;
 
+  /// Return true if there exists a point in the program at which both
+  /// A and B could be operands to the same instruction.
+  /// SCEV expressions are generally assumed to correspond to instructions
+  /// which could exists in IR.  In general, this requires that there exists
+  /// a use point in the program where all operands dominate the use.
+  ///
+  /// Example:
+  /// loop {
+  ///   if
+  ///     loop { v1 = load @global1; }
+  ///   else
+  ///     loop { v2 = load @global2; }
+  /// }
+  /// No SCEV with operand V1, and v2 can exist in this program.
+  bool instructionCouldExistWitthOperands(const SCEV *A, const SCEV *B);
+
   /// Return true if the SCEV is a scAddRecExpr or it contains
   /// scAddRecExpr. The result will be cached in HasRecMap.
   bool containsAddRecurrence(const SCEV *S);
 
-  /// Erase Value from ValueExprMap and ExprValueMap.
-  void eraseValueFromMap(Value *V);
-
   /// Is operation \p BinOp between \p LHS and \p RHS provably does not have
   /// a signed/unsigned overflow (\p Signed)?
   bool willNotOverflow(Instruction::BinaryOps BinOp, bool Signed,
@@ -516,6 +550,12 @@ public:
   std::pair<SCEV::NoWrapFlags, bool /*Deduced*/>
   getStrengthenedNoWrapFlagsFromBinOp(const OverflowingBinaryOperator *OBO);
 
+  /// Notify this ScalarEvolution that \p User directly uses SCEVs in \p Ops.
+  void registerUser(const SCEV *User, ArrayRef<const SCEV *> Ops);
+
+  /// Return true if the SCEV expression contains an undef value.
+  bool containsUndefs(const SCEV *S) const;
+
   /// Return a SCEV expression for the full generality of the specified
   /// expression.
   const SCEV *getSCEV(Value *V);
@@ -700,6 +740,9 @@ public:
   /// cases do exist.
   const SCEV *getPointerBase(const SCEV *V);
 
+  /// Compute an expression equivalent to S - getPointerBase(S).
+  const SCEV *removePointerBase(const SCEV *S);
+
   /// Return a SCEV expression for the specified value at the specified scope
   /// in the program.  The L value specifies a loop nest to evaluate the
   /// expression at, where null is the top-level or a specified loop is
@@ -735,9 +778,13 @@ public:
   /// Convert from an "exit count" (i.e. "backedge taken count") to a "trip
   /// count".  A "trip count" is the number of times the header of the loop
   /// will execute if an exit is taken after the specified number of backedges
-  /// have been taken.  (e.g. TripCount = ExitCount + 1)  A zero result
-  /// must be interpreted as a loop having an unknown trip count.
-  const SCEV *getTripCountFromExitCount(const SCEV *ExitCount);
+  /// have been taken.  (e.g. TripCount = ExitCount + 1).  Note that the
+  /// expression can overflow if ExitCount = UINT_MAX.  \p Extend controls
+  /// how potential overflow is handled.  If true, a wider result type is
+  /// returned. ex: EC = 255 (i8), TC = 256 (i9).  If false, result unsigned
+  /// wraps with 2s-complement semantics.  ex: EC = 255 (i8), TC = 0 (i8)
+  const SCEV *getTripCountFromExitCount(const SCEV *ExitCount,
+                                        bool Extend = true);
 
   /// Returns the exact trip count of the loop if we can compute it, and
   /// the result is a small constant.  '0' is used to represent an unknown
@@ -762,6 +809,13 @@ public:
   /// Returns 0 if the trip count is unknown or not constant.
   unsigned getSmallConstantMaxTripCount(const Loop *L);
 
+  /// Returns the upper bound of the loop trip count infered from array size.
+  /// Can not access bytes starting outside the statically allocated size
+  /// without being immediate UB.
+  /// Returns SCEVCouldNotCompute if the trip count could not inferred
+  /// from array accesses.
+  const SCEV *getConstantMaxTripCountFromArray(const Loop *L);
+
   /// Returns the largest constant divisor of the trip count as a normal
   /// unsigned value, if possible. This means that the actual trip count is
   /// always a multiple of the returned value. Returns 1 if the trip count is
@@ -988,14 +1042,13 @@ public:
   /// Test if the given expression is known to satisfy the condition described
   /// by Pred, LHS, and RHS in the given Context.
   bool isKnownPredicateAt(ICmpInst::Predicate Pred, const SCEV *LHS,
-                        const SCEV *RHS, const Instruction *Context);
+                          const SCEV *RHS, const Instruction *CtxI);
 
   /// Check whether the condition described by Pred, LHS, and RHS is true or
   /// false in the given \p Context. If we know it, return the evaluation of
   /// this condition. If neither is proved, return None.
   Optional<bool> evaluatePredicateAt(ICmpInst::Predicate Pred, const SCEV *LHS,
-                                     const SCEV *RHS,
-                                     const Instruction *Context);
+                                     const SCEV *RHS, const Instruction *CtxI);
 
   /// Test if the condition described by Pred, LHS, RHS is known to be true on
   /// every iteration of the loop of the recurrency LHS.
@@ -1045,7 +1098,7 @@ public:
   getLoopInvariantExitCondDuringFirstIterations(ICmpInst::Predicate Pred,
                                                 const SCEV *LHS,
                                                 const SCEV *RHS, const Loop *L,
-                                                const Instruction *Context,
+                                                const Instruction *CtxI,
                                                 const SCEV *MaxIter);
 
   /// Simplify LHS and RHS in a comparison with predicate Pred. Return true
@@ -1092,110 +1145,11 @@ public:
   /// Return the size of an element read or written by Inst.
   const SCEV *getElementSize(Instruction *Inst);
 
-  /// Compute the array dimensions Sizes from the set of Terms extracted from
-  /// the memory access function of this SCEVAddRecExpr (second step of
-  /// delinearization).
-  void findArrayDimensions(SmallVectorImpl<const SCEV *> &Terms,
-                           SmallVectorImpl<const SCEV *> &Sizes,
-                           const SCEV *ElementSize);
-
   void print(raw_ostream &OS) const;
   void verify() const;
   bool invalidate(Function &F, const PreservedAnalyses &PA,
                   FunctionAnalysisManager::Invalidator &Inv);
 
-  /// Collect parametric terms occurring in step expressions (first step of
-  /// delinearization).
-  void collectParametricTerms(const SCEV *Expr,
-                              SmallVectorImpl<const SCEV *> &Terms);
-
-  /// Return in Subscripts the access functions for each dimension in Sizes
-  /// (third step of delinearization).
-  void computeAccessFunctions(const SCEV *Expr,
-                              SmallVectorImpl<const SCEV *> &Subscripts,
-                              SmallVectorImpl<const SCEV *> &Sizes);
-
-  /// Gathers the individual index expressions from a GEP instruction.
-  ///
-  /// This function optimistically assumes the GEP references into a fixed size
-  /// array. If this is actually true, this function returns a list of array
-  /// subscript expressions in \p Subscripts and a list of integers describing
-  /// the size of the individual array dimensions in \p Sizes. Both lists have
-  /// either equal length or the size list is one element shorter in case there
-  /// is no known size available for the outermost array dimension. Returns true
-  /// if successful and false otherwise.
-  bool getIndexExpressionsFromGEP(const GetElementPtrInst *GEP,
-                                  SmallVectorImpl<const SCEV *> &Subscripts,
-                                  SmallVectorImpl<int> &Sizes);
-
-  /// Split this SCEVAddRecExpr into two vectors of SCEVs representing the
-  /// subscripts and sizes of an array access.
-  ///
-  /// The delinearization is a 3 step process: the first two steps compute the
-  /// sizes of each subscript and the third step computes the access functions
-  /// for the delinearized array:
-  ///
-  /// 1. Find the terms in the step functions
-  /// 2. Compute the array size
-  /// 3. Compute the access function: divide the SCEV by the array size
-  ///    starting with the innermost dimensions found in step 2. The Quotient
-  ///    is the SCEV to be divided in the next step of the recursion. The
-  ///    Remainder is the subscript of the innermost dimension. Loop over all
-  ///    array dimensions computed in step 2.
-  ///
-  /// To compute a uniform array size for several memory accesses to the same
-  /// object, one can collect in step 1 all the step terms for all the memory
-  /// accesses, and compute in step 2 a unique array shape. This guarantees
-  /// that the array shape will be the same across all memory accesses.
-  ///
-  /// FIXME: We could derive the result of steps 1 and 2 from a description of
-  /// the array shape given in metadata.
-  ///
-  /// Example:
-  ///
-  /// A[][n][m]
-  ///
-  /// for i
-  ///   for j
-  ///     for k
-  ///       A[j+k][2i][5i] =
-  ///
-  /// The initial SCEV:
-  ///
-  /// A[{{{0,+,2*m+5}_i, +, n*m}_j, +, n*m}_k]
-  ///
-  /// 1. Find the different terms in the step functions:
-  /// -> [2*m, 5, n*m, n*m]
-  ///
-  /// 2. Compute the array size: sort and unique them
-  /// -> [n*m, 2*m, 5]
-  /// find the GCD of all the terms = 1
-  /// divide by the GCD and erase constant terms
-  /// -> [n*m, 2*m]
-  /// GCD = m
-  /// divide by GCD -> [n, 2]
-  /// remove constant terms
-  /// -> [n]
-  /// size of the array is A[unknown][n][m]
-  ///
-  /// 3. Compute the access function
-  /// a. Divide {{{0,+,2*m+5}_i, +, n*m}_j, +, n*m}_k by the innermost size m
-  /// Quotient: {{{0,+,2}_i, +, n}_j, +, n}_k
-  /// Remainder: {{{0,+,5}_i, +, 0}_j, +, 0}_k
-  /// The remainder is the subscript of the innermost array dimension: [5i].
-  ///
-  /// b. Divide Quotient: {{{0,+,2}_i, +, n}_j, +, n}_k by next outer size n
-  /// Quotient: {{{0,+,0}_i, +, 1}_j, +, 1}_k
-  /// Remainder: {{{0,+,2}_i, +, 0}_j, +, 0}_k
-  /// The Remainder is the subscript of the next array dimension: [2i].
-  ///
-  /// The subscript of the outermost dimension is the Quotient: [j+k].
-  ///
-  /// Overall, we have: A[][n][m], and the access function: A[j+k][2i][5i].
-  void delinearize(const SCEV *Expr, SmallVectorImpl<const SCEV *> &Subscripts,
-                   SmallVectorImpl<const SCEV *> &Sizes,
-                   const SCEV *ElementSize);
-
   /// Return the DataLayout associated with the module this SCEV instance is
   /// operating on.
   const DataLayout &getDataLayout() const {
@@ -1234,6 +1188,18 @@ public:
   /// Try to apply information from loop guards for \p L to \p Expr.
   const SCEV *applyLoopGuards(const SCEV *Expr, const Loop *L);
 
+  /// Return true if the loop has no abnormal exits. That is, if the loop
+  /// is not infinite, it must exit through an explicit edge in the CFG.
+  /// (As opposed to either a) throwing out of the function or b) entering a
+  /// well defined infinite loop in some callee.)
+  bool loopHasNoAbnormalExits(const Loop *L) {
+    return getLoopProperties(L).HasNoAbnormalExits;
+  }
+
+  /// Return true if this loop is finite by assumption.  That is,
+  /// to be infinite, it must also be undefined.
+  bool loopIsFiniteByAssumption(const Loop *L);
+
 private:
   /// A CallbackVH to arrange for ScalarEvolution to be notified whenever a
   /// Value is deleted.
@@ -1532,15 +1498,15 @@ private:
       LoopDispositions;
 
   struct LoopProperties {
-    /// Set to true if the loop contains no instruction that can have side
-    /// effects (i.e. via throwing an exception, volatile or atomic access).
-    bool HasNoAbnormalExits;
-
     /// Set to true if the loop contains no instruction that can abnormally exit
     /// the loop (i.e. via throwing an exception, by terminating the thread
     /// cleanly or by infinite looping in a called function).  Strictly
     /// speaking, the last one is not leaving the loop, but is identical to
     /// leaving the loop for reasoning about undefined behavior.
+    bool HasNoAbnormalExits;
+
+    /// Set to true if the loop contains no instruction that can have side
+    /// effects (i.e. via throwing an exception, volatile or atomic access).
     bool HasNoSideEffects;
   };
 
@@ -1554,14 +1520,6 @@ private:
     return getLoopProperties(L).HasNoSideEffects;
   }
 
-  bool loopHasNoAbnormalExits(const Loop *L) {
-    return getLoopProperties(L).HasNoAbnormalExits;
-  }
-
-  /// Return true if this loop is finite by assumption.  That is,
-  /// to be infinite, it must also be undefined.
-  bool loopIsFiniteByAssumption(const Loop *L);
-
   /// Compute a LoopDisposition value.
   LoopDisposition computeLoopDisposition(const SCEV *S, const Loop *L);
 
@@ -1574,6 +1532,9 @@ private:
   /// Compute a BlockDisposition value.
   BlockDisposition computeBlockDisposition(const SCEV *S, const BasicBlock *BB);
 
+  /// Stores all SCEV that use a given SCEV as its direct operand.
+  DenseMap<const SCEV *, SmallPtrSet<const SCEV *, 8> > SCEVUsers;
+
   /// Memoized results from getRange
   DenseMap<const SCEV *, ConstantRange> UnsignedRanges;
 
@@ -1600,22 +1561,22 @@ private:
   /// copied if its needed for longer.
   const ConstantRange &getRangeRef(const SCEV *S, RangeSignHint Hint);
 
-  /// Determines the range for the affine SCEVAddRecExpr {\p Start,+,\p Stop}.
+  /// Determines the range for the affine SCEVAddRecExpr {\p Start,+,\p Step}.
   /// Helper for \c getRange.
-  ConstantRange getRangeForAffineAR(const SCEV *Start, const SCEV *Stop,
+  ConstantRange getRangeForAffineAR(const SCEV *Start, const SCEV *Step,
                                     const SCEV *MaxBECount, unsigned BitWidth);
 
   /// Determines the range for the affine non-self-wrapping SCEVAddRecExpr {\p
-  /// Start,+,\p Stop}<nw>.
+  /// Start,+,\p Step}<nw>.
   ConstantRange getRangeForAffineNoSelfWrappingAR(const SCEVAddRecExpr *AddRec,
                                                   const SCEV *MaxBECount,
                                                   unsigned BitWidth,
                                                   RangeSignHint SignHint);
 
   /// Try to compute a range for the affine SCEVAddRecExpr {\p Start,+,\p
-  /// Stop} by "factoring out" a ternary expression from the add recurrence.
+  /// Step} by "factoring out" a ternary expression from the add recurrence.
   /// Helper called by \c getRange.
-  ConstantRange getRangeViaFactoring(const SCEV *Start, const SCEV *Stop,
+  ConstantRange getRangeViaFactoring(const SCEV *Start, const SCEV *Step,
                                      const SCEV *MaxBECount, unsigned BitWidth);
 
   /// If the unknown expression U corresponds to a simple recurrence, return
@@ -1761,12 +1722,6 @@ private:
                                                  BasicBlock *ExitingBB,
                                                  bool IsSubExpr);
 
-  /// Given an exit condition of 'icmp op load X, cst', try to see if we can
-  /// compute the backedge-taken count.
-  ExitLimit computeLoadConstantCompareExitLimit(LoadInst *LI, Constant *RHS,
-                                                const Loop *L,
-                                                ICmpInst::Predicate p);
-
   /// Compute the exit limit of a loop that is controlled by a
   /// "(IV >> 1) != 0" type comparison.  We cannot compute the exact trip
   /// count in these cases (since SCEV has no way of expressing them), but we
@@ -1839,7 +1794,7 @@ private:
                                   const SCEV *RHS,
                                   ICmpInst::Predicate FoundPred,
                                   const SCEV *FoundLHS, const SCEV *FoundRHS,
-                                  const Instruction *Context);
+                                  const Instruction *CtxI);
 
   /// Test whether the condition described by Pred, LHS, and RHS is true
   /// whenever the condition described by FoundPred, FoundLHS, FoundRHS is
@@ -1914,7 +1869,7 @@ private:
                                            const SCEV *LHS, const SCEV *RHS,
                                            const SCEV *FoundLHS,
                                            const SCEV *FoundRHS,
-                                           const Instruction *Context);
+                                           const Instruction *CtxI);
 
   /// Test whether the condition described by Pred, LHS, and RHS is true
   /// whenever the condition described by Pred, FoundLHS, and FoundRHS is
@@ -1956,12 +1911,18 @@ private:
   bool splitBinaryAdd(const SCEV *Expr, const SCEV *&L, const SCEV *&R,
                       SCEV::NoWrapFlags &Flags);
 
-  /// Drop memoized information computed for S.
-  void forgetMemoizedResults(const SCEV *S);
+  /// Drop memoized information for all \p SCEVs.
+  void forgetMemoizedResults(ArrayRef<const SCEV *> SCEVs);
+
+  /// Helper for forgetMemoizedResults.
+  void forgetMemoizedResultsImpl(const SCEV *S);
 
   /// Return an existing SCEV for V if there is one, otherwise return nullptr.
   const SCEV *getExistingSCEV(Value *V);
 
+  /// Erase Value from ValueExprMap and ExprValueMap.
+  void eraseValueFromMap(Value *V);
+
   /// Return false iff given SCEV contains a SCEVUnknown with NULL value-
   /// pointer.
   bool checkValidity(const SCEV *S) const;
@@ -1995,6 +1956,27 @@ private:
   /// would trigger undefined behavior on overflow.
   SCEV::NoWrapFlags getNoWrapFlagsFromUB(const Value *V);
 
+  /// Return a scope which provides an upper bound on the defining scope of
+  /// 'S'. Specifically, return the first instruction in said bounding scope.
+  /// Return nullptr if the scope is trivial (function entry).
+  /// (See scope definition rules associated with flag discussion above)
+  const Instruction *getNonTrivialDefiningScopeBound(const SCEV *S);
+
+  /// Return a scope which provides an upper bound on the defining scope for
+  /// a SCEV with the operands in Ops.  The outparam Precise is set if the
+  /// bound found is a precise bound (i.e. must be the defining scope.)
+  const Instruction *getDefiningScopeBound(ArrayRef<const SCEV *> Ops,
+                                           bool &Precise);
+
+  /// Wrapper around the above for cases which don't care if the bound
+  /// is precise.
+  const Instruction *getDefiningScopeBound(ArrayRef<const SCEV *> Ops);
+
+  /// Given two instructions in the same function, return true if we can
+  /// prove B must execute given A executes.
+  bool isGuaranteedToTransferExecutionTo(const Instruction *A,
+                                         const Instruction *B);
+
   /// Return true if the SCEV corresponding to \p I is never poison.  Proving
   /// this is more complex than proving that just \p I is never poison, since
   /// SCEV commons expressions across control flow, and you can have cases
@@ -2036,8 +2018,11 @@ private:
   /// permitted by Start, End, and Stride. This is for loops of the form
   /// {Start, +, Stride} LT End.
   ///
-  /// Precondition: the induction variable is known to be positive.  We *don't*
-  /// assert these preconditions so please be careful.
+  /// Preconditions:
+  /// * the induction variable is known to be positive.
+  /// * the induction variable is assumed not to overflow (i.e. either it
+  ///   actually doesn't, or we'd have to immediately execute UB)
+  /// We *don't* assert these preconditions so please be careful.
   const SCEV *computeMaxBECountForLT(const SCEV *Start, const SCEV *Stride,
                                      const SCEV *End, unsigned BitWidth,
                                      bool IsSigned);
@@ -2072,31 +2057,20 @@ private:
   /// an add rec on said loop.
   void getUsedLoops(const SCEV *S, SmallPtrSetImpl<const Loop *> &LoopsUsed);
 
-  /// Find all of the loops transitively used in \p S, and update \c LoopUsers
-  /// accordingly.
-  void addToLoopUseLists(const SCEV *S);
-
   /// Try to match the pattern generated by getURemExpr(A, B). If successful,
   /// Assign A and B to LHS and RHS, respectively.
   bool matchURem(const SCEV *Expr, const SCEV *&LHS, const SCEV *&RHS);
 
   /// Look for a SCEV expression with type `SCEVType` and operands `Ops` in
-  /// `UniqueSCEVs`.
-  ///
-  /// The first component of the returned tuple is the SCEV if found and null
-  /// otherwise.  The second component is the `FoldingSetNodeID` that was
-  /// constructed to look up the SCEV and the third component is the insertion
-  /// point.
-  std::tuple<SCEV *, FoldingSetNodeID, void *>
-  findExistingSCEVInCache(SCEVTypes SCEVType, ArrayRef<const SCEV *> Ops);
+  /// `UniqueSCEVs`.  Return if found, else nullptr.
+  SCEV *findExistingSCEVInCache(SCEVTypes SCEVType, ArrayRef<const SCEV *> Ops);
 
   FoldingSet<SCEV> UniqueSCEVs;
   FoldingSet<SCEVPredicate> UniquePreds;
   BumpPtrAllocator SCEVAllocator;
 
-  /// This maps loops to a list of SCEV expressions that (transitively) use said
-  /// loop.
-  DenseMap<const Loop *, SmallVector<const SCEV *, 4>> LoopUsers;
+  /// This maps loops to a list of addrecs that directly use said loop.
+  DenseMap<const Loop *, SmallVector<const SCEVAddRecExpr *, 4>> LoopUsers;
 
   /// Cache tentative mappings from UnknownSCEVs in a Loop, to a SCEV expression
   /// they can be rewritten into under certain predicates.
diff --git a/llvm/include/llvm/Analysis/StackLifetime.h b/llvm/include/llvm/Analysis/StackLifetime.h
index df342a9533ee..239aec4e258b 100644
--- a/llvm/include/llvm/Analysis/StackLifetime.h
+++ b/llvm/include/llvm/Analysis/StackLifetime.h
@@ -191,6 +191,8 @@ public:
   StackLifetimePrinterPass(raw_ostream &OS, StackLifetime::LivenessType Type)
       : Type(Type), OS(OS) {}
   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+  void printPipeline(raw_ostream &OS,
+                     function_ref<StringRef(StringRef)> MapClassName2PassName);
 };
 
 } // end namespace llvm
diff --git a/llvm/include/llvm/Analysis/StackSafetyAnalysis.h b/llvm/include/llvm/Analysis/StackSafetyAnalysis.h
index 59c1e3e3bd56..751735f3e59f 100644
--- a/llvm/include/llvm/Analysis/StackSafetyAnalysis.h
+++ b/llvm/include/llvm/Analysis/StackSafetyAnalysis.h
@@ -75,7 +75,15 @@ public:
   StackSafetyGlobalInfo &operator=(StackSafetyGlobalInfo &&);
   ~StackSafetyGlobalInfo();
 
+  // Whether we can prove that all accesses to this Alloca are in-range and
+  // during its lifetime.
   bool isSafe(const AllocaInst &AI) const;
+
+  // Returns true if the instruction can be proven to do only two types of
+  // memory accesses:
+  //  (1) live stack locations in-bounds or
+  //  (2) non-stack locations.
+  bool stackAccessIsSafe(const Instruction &I) const;
   void print(raw_ostream &O) const;
   void dump() const;
 };
diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.h b/llvm/include/llvm/Analysis/TargetLibraryInfo.h
index 22bfeda0efd0..6e3e1380535e 100644
--- a/llvm/include/llvm/Analysis/TargetLibraryInfo.h
+++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.h
@@ -76,7 +76,7 @@ class TargetLibraryInfoImpl {
   /// Return true if the function type FTy is valid for the library function
   /// F, regardless of whether the function is available.
   bool isValidProtoForLibFunc(const FunctionType &FTy, LibFunc F,
-                              const DataLayout *DL) const;
+                              const Module &M) const;
 
 public:
   /// List of known vector-functions libraries.
@@ -115,6 +115,8 @@ public:
   ///
   /// If it is one of the known library functions, return true and set F to the
   /// corresponding value.
+  ///
+  /// FDecl is assumed to have a parent Module when using this function.
   bool getLibFunc(const Function &FDecl, LibFunc &F) const;
 
   /// Forces a function to be marked as unavailable.
@@ -238,7 +240,7 @@ public:
     else {
       // Disable individual libc/libm calls in TargetLibraryInfo.
       LibFunc LF;
-      AttributeSet FnAttrs = (*F)->getAttributes().getFnAttributes();
+      AttributeSet FnAttrs = (*F)->getAttributes().getFnAttrs();
       for (const Attribute &Attr : FnAttrs) {
         if (!Attr.isStringAttribute())
           continue;
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 628058142e48..170d6b8f35ff 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -21,7 +21,6 @@
 #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
 #define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
 
-#include "llvm/Analysis/IVDescriptors.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/PassManager.h"
@@ -31,6 +30,7 @@
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/InstructionCost.h"
 #include <functional>
+#include <utility>
 
 namespace llvm {
 
@@ -47,12 +47,14 @@ class ExtractElementInst;
 class Function;
 class GlobalValue;
 class InstCombiner;
+class OptimizationRemarkEmitter;
 class IntrinsicInst;
 class LoadInst;
 class LoopAccessInfo;
 class Loop;
 class LoopInfo;
 class ProfileSummaryInfo;
+class RecurrenceDescriptor;
 class SCEV;
 class ScalarEvolution;
 class StoreInst;
@@ -97,7 +99,7 @@ struct HardwareLoopInfo {
   Loop *L = nullptr;
   BasicBlock *ExitBlock = nullptr;
   BranchInst *ExitBranch = nullptr;
-  const SCEV *TripCount = nullptr;
+  const SCEV *ExitCount = nullptr;
   IntegerType *CountType = nullptr;
   Value *LoopDecrement = nullptr; // Decrement the loop counter by this
                                   // value in every iteration.
@@ -382,8 +384,15 @@ public:
 
   bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const;
 
+  /// Return true if globals in this address space can have initializers other
+  /// than `undef`.
+  bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const;
+
   unsigned getAssumedAddrSpace(const Value *V) const;
 
+  std::pair<const Value *, unsigned>
+  getPredicatedAddrSpace(const Value *V) const;
+
   /// Rewrite intrinsic call \p II such that \p OldV will be replaced with \p
   /// NewV, which has a different address space. This should happen for every
   /// operand index that collectFlatAddressOperands returned for the intrinsic.
@@ -506,7 +515,8 @@ public:
   /// transformation. The caller will initialize UP with the current
   /// target-independent defaults.
   void getUnrollingPreferences(Loop *L, ScalarEvolution &,
-                               UnrollingPreferences &UP) const;
+                               UnrollingPreferences &UP,
+                               OptimizationRemarkEmitter *ORE) const;
 
   /// Query the target whether it would be profitable to convert the given loop
   /// into a hardware loop.
@@ -660,6 +670,9 @@ public:
   /// Return true if the target supports masked expand load.
   bool isLegalMaskedExpandLoad(Type *DataType) const;
 
+  /// Return true if we should be enabling ordered reductions for the target.
+  bool enableOrderedReductions() const;
+
   /// Return true if the target has a unified operation to calculate division
   /// and remainder. If so, the additional implicit multiplication and
   /// subtraction required to calculate a remainder from division are free. This
@@ -907,6 +920,9 @@ public:
   ///  architectural maximum vector length, and None otherwise.
   Optional<unsigned> getMaxVScale() const;
 
+  /// \return the value of vscale to tune the cost model for.
+  Optional<unsigned> getVScaleForTuning() const;
+
   /// \return True if the vectorization factor should be chosen to
   /// make the vector of the smallest element type match the size of a
   /// vector register. For wider element types, this could result in
@@ -1094,8 +1110,8 @@ public:
   /// is using a compare with the specified predicate as condition. When vector
   /// types are passed, \p VecPred must be used for all lanes.
   InstructionCost
-  getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy = nullptr,
-                     CmpInst::Predicate VecPred = CmpInst::BAD_ICMP_PREDICATE,
+  getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+                     CmpInst::Predicate VecPred,
                      TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
                      const Instruction *I = nullptr) const;
 
@@ -1104,6 +1120,16 @@ public:
   InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
                                      unsigned Index = -1) const;
 
+  /// \return The cost of replication shuffle of \p VF elements typed \p EltTy
+  /// \p ReplicationFactor times.
+  ///
+  /// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is:
+  ///   <0,0,0,1,1,1,2,2,2,3,3,3>
+  InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor,
+                                            int VF,
+                                            const APInt &DemandedDstElts,
+                                            TTI::TargetCostKind CostKind);
+
   /// \return The cost of Load and Store instructions.
   InstructionCost
   getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
@@ -1452,13 +1478,18 @@ public:
   virtual bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
                                           Intrinsic::ID IID) const = 0;
   virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0;
+  virtual bool
+  canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const = 0;
   virtual unsigned getAssumedAddrSpace(const Value *V) const = 0;
+  virtual std::pair<const Value *, unsigned>
+  getPredicatedAddrSpace(const Value *V) const = 0;
   virtual Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
                                                   Value *OldV,
                                                   Value *NewV) const = 0;
   virtual bool isLoweredToCall(const Function *F) = 0;
   virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &,
-                                       UnrollingPreferences &UP) = 0;
+                                       UnrollingPreferences &UP,
+                                       OptimizationRemarkEmitter *ORE) = 0;
   virtual void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
                                      PeelingPreferences &PP) = 0;
   virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
@@ -1505,6 +1536,7 @@ public:
   virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) = 0;
   virtual bool isLegalMaskedCompressStore(Type *DataType) = 0;
   virtual bool isLegalMaskedExpandLoad(Type *DataType) = 0;
+  virtual bool enableOrderedReductions() = 0;
   virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
   virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
   virtual bool prefersVectorizedAddressing() = 0;
@@ -1563,6 +1595,7 @@ public:
   virtual TypeSize getRegisterBitWidth(RegisterKind K) const = 0;
   virtual unsigned getMinVectorRegisterBitWidth() const = 0;
   virtual Optional<unsigned> getMaxVScale() const = 0;
+  virtual Optional<unsigned> getVScaleForTuning() const = 0;
   virtual bool shouldMaximizeVectorBandwidth() const = 0;
   virtual ElementCount getMinimumVF(unsigned ElemWidth,
                                     bool IsScalable) const = 0;
@@ -1623,6 +1656,12 @@ public:
                                              const Instruction *I) = 0;
   virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
                                              unsigned Index) = 0;
+
+  virtual InstructionCost
+  getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
+                            const APInt &DemandedDstElts,
+                            TTI::TargetCostKind CostKind) = 0;
+
   virtual InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src,
                                           Align Alignment,
                                           unsigned AddressSpace,
@@ -1730,8 +1769,8 @@ public:
   InstructionCost
   getGEPCost(Type *PointeeType, const Value *Ptr,
              ArrayRef<const Value *> Operands,
-             enum TargetTransformInfo::TargetCostKind CostKind) override {
-    return Impl.getGEPCost(PointeeType, Ptr, Operands);
+             TargetTransformInfo::TargetCostKind CostKind) override {
+    return Impl.getGEPCost(PointeeType, Ptr, Operands, CostKind);
   }
   unsigned getInliningThresholdMultiplier() override {
     return Impl.getInliningThresholdMultiplier();
@@ -1775,10 +1814,20 @@ public:
     return Impl.isNoopAddrSpaceCast(FromAS, ToAS);
   }
 
+  bool
+  canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const override {
+    return Impl.canHaveNonUndefGlobalInitializerInAddressSpace(AS);
+  }
+
   unsigned getAssumedAddrSpace(const Value *V) const override {
     return Impl.getAssumedAddrSpace(V);
   }
 
+  std::pair<const Value *, unsigned>
+  getPredicatedAddrSpace(const Value *V) const override {
+    return Impl.getPredicatedAddrSpace(V);
+  }
+
   Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
                                           Value *NewV) const override {
     return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
@@ -1788,8 +1837,9 @@ public:
     return Impl.isLoweredToCall(F);
   }
   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
-                               UnrollingPreferences &UP) override {
-    return Impl.getUnrollingPreferences(L, SE, UP);
+                               UnrollingPreferences &UP,
+                               OptimizationRemarkEmitter *ORE) override {
+    return Impl.getUnrollingPreferences(L, SE, UP, ORE);
   }
   void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
                              PeelingPreferences &PP) override {
@@ -1886,6 +1936,9 @@ public:
   bool isLegalMaskedExpandLoad(Type *DataType) override {
     return Impl.isLegalMaskedExpandLoad(DataType);
   }
+  bool enableOrderedReductions() override {
+    return Impl.enableOrderedReductions();
+  }
   bool hasDivRemOp(Type *DataType, bool IsSigned) override {
     return Impl.hasDivRemOp(DataType, IsSigned);
   }
@@ -2015,6 +2068,9 @@ public:
   Optional<unsigned> getMaxVScale() const override {
     return Impl.getMaxVScale();
   }
+  Optional<unsigned> getVScaleForTuning() const override {
+    return Impl.getVScaleForTuning();
+  }
   bool shouldMaximizeVectorBandwidth() const override {
     return Impl.shouldMaximizeVectorBandwidth();
   }
@@ -2115,6 +2171,13 @@ public:
                                      unsigned Index) override {
     return Impl.getVectorInstrCost(Opcode, Val, Index);
   }
+  InstructionCost
+  getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
+                            const APInt &DemandedDstElts,
+                            TTI::TargetCostKind CostKind) override {
+    return Impl.getReplicationShuffleCost(EltTy, ReplicationFactor, VF,
+                                          DemandedDstElts, CostKind);
+  }
   InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
                                   unsigned AddressSpace,
                                   TTI::TargetCostKind CostKind,
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index c07a33c9f155..05ef2495475f 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -24,6 +24,7 @@
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/Type.h"
+#include <utility>
 
 using namespace llvm::PatternMatch;
 
@@ -47,10 +48,9 @@ public:
 
   const DataLayout &getDataLayout() const { return DL; }
 
-  InstructionCost
-  getGEPCost(Type *PointeeType, const Value *Ptr,
-             ArrayRef<const Value *> Operands,
-             TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) const {
+  InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
+                             ArrayRef<const Value *> Operands,
+                             TTI::TargetCostKind CostKind) const {
     // In the basic model, we just assume that all-constant GEPs will be folded
     // into their uses via addressing modes.
     for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx)
@@ -105,9 +105,17 @@ public:
   }
 
   bool isNoopAddrSpaceCast(unsigned, unsigned) const { return false; }
+  bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const {
+    return AS == 0;
+  };
 
   unsigned getAssumedAddrSpace(const Value *V) const { return -1; }
 
+  std::pair<const Value *, unsigned>
+  getPredicatedAddrSpace(const Value *V) const {
+    return std::make_pair(nullptr, -1);
+  }
+
   Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
                                           Value *NewV) const {
     return nullptr;
@@ -187,7 +195,8 @@ public:
   }
 
   void getUnrollingPreferences(Loop *, ScalarEvolution &,
-                               TTI::UnrollingPreferences &) const {}
+                               TTI::UnrollingPreferences &,
+                               OptimizationRemarkEmitter *) const {}
 
   void getPeelingPreferences(Loop *, ScalarEvolution &,
                              TTI::PeelingPreferences &) const {}
@@ -262,6 +271,8 @@ public:
 
   bool isLegalMaskedExpandLoad(Type *DataType) const { return false; }
 
+  bool enableOrderedReductions() const { return false; }
+
   bool hasDivRemOp(Type *DataType, bool IsSigned) const { return false; }
 
   bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const {
@@ -394,6 +405,7 @@ public:
   unsigned getMinVectorRegisterBitWidth() const { return 128; }
 
   Optional<unsigned> getMaxVScale() const { return None; }
+  Optional<unsigned> getVScaleForTuning() const { return None; }
 
   bool shouldMaximizeVectorBandwidth() const { return false; }
 
@@ -539,6 +551,12 @@ public:
     return 1;
   }
 
+  unsigned getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
+                                     const APInt &DemandedDstElts,
+                                     TTI::TargetCostKind CostKind) {
+    return 1;
+  }
+
   InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
                                   unsigned AddressSpace,
                                   TTI::TargetCostKind CostKind,
@@ -614,7 +632,8 @@ public:
     return 1;
   }
 
-  unsigned getNumberOfParts(Type *Tp) const { return 0; }
+  // Assume that we have a register of the right size for the type.
+  unsigned getNumberOfParts(Type *Tp) const { return 1; }
 
   InstructionCost getAddressComputationCost(Type *Tp, ScalarEvolution *,
                                             const SCEV *) const {
@@ -632,9 +651,10 @@ public:
     return 1;
   }
 
-  InstructionCost getExtendedAddReductionCost(
-      bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty,
-      TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const {
+  InstructionCost
+  getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned, Type *ResTy,
+                              VectorType *Ty,
+                              TTI::TargetCostKind CostKind) const {
     return 1;
   }
 
@@ -856,10 +876,9 @@ protected:
 public:
   using BaseT::getGEPCost;
 
-  InstructionCost
-  getGEPCost(Type *PointeeType, const Value *Ptr,
-             ArrayRef<const Value *> Operands,
-             TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) {
+  InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
+                             ArrayRef<const Value *> Operands,
+                             TTI::TargetCostKind CostKind) {
     assert(PointeeType && Ptr && "can't get GEPCost of nullptr");
     assert(cast<PointerType>(Ptr->getType()->getScalarType())
                ->isOpaqueOrPointeeTypeMatches(PointeeType) &&
@@ -964,10 +983,10 @@ public:
         return TTI::TCC_Free;
       break;
     case Instruction::GetElementPtr: {
-      const GEPOperator *GEP = cast<GEPOperator>(U);
+      const auto *GEP = cast<GEPOperator>(U);
       return TargetTTI->getGEPCost(GEP->getSourceElementType(),
                                    GEP->getPointerOperand(),
-                                   Operands.drop_front());
+                                   Operands.drop_front(), CostKind);
     }
     case Instruction::Add:
     case Instruction::FAdd:
@@ -1063,58 +1082,94 @@ public:
       auto *IE = dyn_cast<InsertElementInst>(U);
       if (!IE)
         return TTI::TCC_Basic; // FIXME
-      auto *CI = dyn_cast<ConstantInt>(IE->getOperand(2));
-      unsigned Idx = CI ? CI->getZExtValue() : -1;
+      unsigned Idx = -1;
+      if (auto *CI = dyn_cast<ConstantInt>(IE->getOperand(2)))
+        if (CI->getValue().getActiveBits() <= 32)
+          Idx = CI->getZExtValue();
       return TargetTTI->getVectorInstrCost(Opcode, Ty, Idx);
     }
     case Instruction::ShuffleVector: {
       auto *Shuffle = dyn_cast<ShuffleVectorInst>(U);
       if (!Shuffle)
         return TTI::TCC_Basic; // FIXME
+
       auto *VecTy = cast<VectorType>(U->getType());
       auto *VecSrcTy = cast<VectorType>(U->getOperand(0)->getType());
+      int NumSubElts, SubIndex;
+
+      if (Shuffle->changesLength()) {
+        // Treat a 'subvector widening' as a free shuffle.
+        if (Shuffle->increasesLength() && Shuffle->isIdentityWithPadding())
+          return 0;
+
+        if (Shuffle->isExtractSubvectorMask(SubIndex))
+          return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecSrcTy,
+                                           Shuffle->getShuffleMask(), SubIndex,
+                                           VecTy);
+
+        if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
+          return TargetTTI->getShuffleCost(
+              TTI::SK_InsertSubvector, VecTy, Shuffle->getShuffleMask(),
+              SubIndex,
+              FixedVectorType::get(VecTy->getScalarType(), NumSubElts));
+
+        int ReplicationFactor, VF;
+        if (Shuffle->isReplicationMask(ReplicationFactor, VF)) {
+          APInt DemandedDstElts =
+              APInt::getNullValue(Shuffle->getShuffleMask().size());
+          for (auto I : enumerate(Shuffle->getShuffleMask())) {
+            if (I.value() != UndefMaskElem)
+              DemandedDstElts.setBit(I.index());
+          }
+          return TargetTTI->getReplicationShuffleCost(
+              VecSrcTy->getElementType(), ReplicationFactor, VF,
+              DemandedDstElts, CostKind);
+        }
 
-      // TODO: Identify and add costs for insert subvector, etc.
-      int SubIndex;
-      if (Shuffle->isExtractSubvectorMask(SubIndex))
-        return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecSrcTy,
-                                         Shuffle->getShuffleMask(), SubIndex,
-                                         VecTy);
-      else if (Shuffle->changesLength())
         return CostKind == TTI::TCK_RecipThroughput ? -1 : 1;
-      else if (Shuffle->isIdentity())
+      }
+
+      if (Shuffle->isIdentity())
         return 0;
-      else if (Shuffle->isReverse())
+
+      if (Shuffle->isReverse())
         return TargetTTI->getShuffleCost(TTI::SK_Reverse, VecTy,
                                          Shuffle->getShuffleMask(), 0, nullptr);
-      else if (Shuffle->isSelect())
+
+      if (Shuffle->isSelect())
         return TargetTTI->getShuffleCost(TTI::SK_Select, VecTy,
                                          Shuffle->getShuffleMask(), 0, nullptr);
-      else if (Shuffle->isTranspose())
+
+      if (Shuffle->isTranspose())
         return TargetTTI->getShuffleCost(TTI::SK_Transpose, VecTy,
                                          Shuffle->getShuffleMask(), 0, nullptr);
-      else if (Shuffle->isZeroEltSplat())
+
+      if (Shuffle->isZeroEltSplat())
         return TargetTTI->getShuffleCost(TTI::SK_Broadcast, VecTy,
                                          Shuffle->getShuffleMask(), 0, nullptr);
-      else if (Shuffle->isSingleSource())
+
+      if (Shuffle->isSingleSource())
         return TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, VecTy,
                                          Shuffle->getShuffleMask(), 0, nullptr);
 
+      if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
+        return TargetTTI->getShuffleCost(
+            TTI::SK_InsertSubvector, VecTy, Shuffle->getShuffleMask(), SubIndex,
+            FixedVectorType::get(VecTy->getScalarType(), NumSubElts));
+
       return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy,
                                        Shuffle->getShuffleMask(), 0, nullptr);
     }
     case Instruction::ExtractElement: {
-      unsigned Idx = -1;
       auto *EEI = dyn_cast<ExtractElementInst>(U);
       if (!EEI)
         return TTI::TCC_Basic; // FIXME
-
-      auto *CI = dyn_cast<ConstantInt>(EEI->getOperand(1));
-      if (CI)
-        Idx = CI->getZExtValue();
-
-      return TargetTTI->getVectorInstrCost(Opcode, U->getOperand(0)->getType(),
-                                           Idx);
+      unsigned Idx = -1;
+      if (auto *CI = dyn_cast<ConstantInt>(EEI->getOperand(1)))
+        if (CI->getValue().getActiveBits() <= 32)
+          Idx = CI->getZExtValue();
+      Type *DstTy = U->getOperand(0)->getType();
+      return TargetTTI->getVectorInstrCost(Opcode, DstTy, Idx);
     }
     }
     // By default, just classify everything as 'basic'.
diff --git a/llvm/include/llvm/Analysis/TypeMetadataUtils.h b/llvm/include/llvm/Analysis/TypeMetadataUtils.h
index 3f7603142900..074c40942b06 100644
--- a/llvm/include/llvm/Analysis/TypeMetadataUtils.h
+++ b/llvm/include/llvm/Analysis/TypeMetadataUtils.h
@@ -22,6 +22,7 @@ namespace llvm {
 class CallBase;
 class CallInst;
 class Constant;
+class Function;
 class DominatorTree;
 class Instruction;
 class Module;
@@ -56,7 +57,30 @@ void findDevirtualizableCallsForTypeCheckedLoad(
     SmallVectorImpl<Instruction *> &Preds, bool &HasNonCallUses,
     const CallInst *CI, DominatorTree &DT);
 
-Constant *getPointerAtOffset(Constant *I, uint64_t Offset, Module &M);
-}
+/// Processes a Constant recursively looking into elements of arrays, structs
+/// and expressions to find a trivial pointer element that is located at the
+/// given offset (relative to the beginning of the whole outer Constant).
+///
+/// Used for example from GlobalDCE to find an entry in a C++ vtable that
+/// matches a vcall offset.
+///
+/// To support Swift vtables, getPointerAtOffset can see through "relative
+/// pointers", i.e. (sub-)expressions of the form of:
+///
+/// @symbol = ... {
+///   i32 trunc (i64 sub (
+///     i64 ptrtoint (<type> @target to i64), i64 ptrtoint (... @symbol to i64)
+///   ) to i32)
+/// }
+///
+/// For such (sub-)expressions, getPointerAtOffset returns the @target pointer.
+Constant *getPointerAtOffset(Constant *I, uint64_t Offset, Module &M,
+                             Constant *TopLevelGlobal = nullptr);
+
+/// Finds the same "relative pointer" pattern as described above, where the
+/// target is `F`, and replaces the entire pattern with a constant zero.
+void replaceRelativePointerUsersWithZero(Function *F);
+
+} // namespace llvm
 
 #endif
diff --git a/llvm/include/llvm/Analysis/Utils/TFUtils.h b/llvm/include/llvm/Analysis/Utils/TFUtils.h
index 47ee23e06000..1f6be0e60eb9 100644
--- a/llvm/include/llvm/Analysis/Utils/TFUtils.h
+++ b/llvm/include/llvm/Analysis/Utils/TFUtils.h
@@ -104,6 +104,9 @@ Optional<TensorSpec> getTensorSpecFromJSON(LLVMContext &Ctx,
 struct LoggedFeatureSpec {
   TensorSpec Spec;
   Optional<std::string> LoggingName;
+  const std::string &getLoggingName() const {
+    return LoggingName ? *LoggingName : Spec.name();
+  }
 };
 
 /// Load the output specs. If SpecFileOverride is not empty, that path is used.
@@ -170,7 +173,9 @@ public:
   // we can consider using bytes.
   char *addEntryAndGetFloatOrInt64Buffer(size_t FeatureID);
 
-  void print(raw_ostream &OS);
+  // Flush the content of the log to the stream, clearing the stored data in the
+  // process.
+  void flush(raw_ostream &OS);
 
 private:
   std::vector<LoggedFeatureSpec> FeatureSpecs;
diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h
index 90ec742f18e6..b4f38a3e976f 100644
--- a/llvm/include/llvm/Analysis/ValueTracking.h
+++ b/llvm/include/llvm/Analysis/ValueTracking.h
@@ -203,6 +203,15 @@ constexpr unsigned MaxAnalysisRecursionDepth = 6;
                               const DominatorTree *DT = nullptr,
                               bool UseInstrInfo = true);
 
+  /// Get the minimum bit size for this Value \p Op as a signed integer.
+  /// i.e.  x == sext(trunc(x to MinSignedBits) to bitwidth(x)).
+  /// Similar to the APInt::getMinSignedBits function.
+  unsigned ComputeMinSignedBits(const Value *Op, const DataLayout &DL,
+                                unsigned Depth = 0,
+                                AssumptionCache *AC = nullptr,
+                                const Instruction *CxtI = nullptr,
+                                const DominatorTree *DT = nullptr);
+
   /// This function computes the integer multiple of Base that equals V. If
   /// successful, it returns true and returns the multiple in Multiple. If
   /// unsuccessful, it returns false. Also, if V can be simplified to an
@@ -549,6 +558,7 @@ constexpr unsigned MaxAnalysisRecursionDepth = 6;
   ConstantRange computeConstantRange(const Value *V, bool UseInstrInfo = true,
                                      AssumptionCache *AC = nullptr,
                                      const Instruction *CtxI = nullptr,
+                                     const DominatorTree *DT = nullptr,
                                      unsigned Depth = 0);
 
   /// Return true if this function can prove that the instruction I will
@@ -573,6 +583,18 @@ constexpr unsigned MaxAnalysisRecursionDepth = 6;
   /// instruction variant of this function.
   bool isGuaranteedToTransferExecutionToSuccessor(const BasicBlock *BB);
 
+  /// Return true if every instruction in the range (Begin, End) is
+  /// guaranteed to transfer execution to its static successor. \p ScanLimit
+  /// bounds the search to avoid scanning huge blocks.
+  bool isGuaranteedToTransferExecutionToSuccessor(
+     BasicBlock::const_iterator Begin, BasicBlock::const_iterator End,
+     unsigned ScanLimit = 32);
+
+  /// Same as previous, but with range expressed via iterator_range.
+  bool isGuaranteedToTransferExecutionToSuccessor(
+     iterator_range<BasicBlock::const_iterator> Range,
+     unsigned ScanLimit = 32);
+
   /// Return true if this function can prove that the instruction I
   /// is executed for every iteration of the loop L.
   ///
@@ -624,10 +646,16 @@ constexpr unsigned MaxAnalysisRecursionDepth = 6;
   /// true. If Op raises immediate UB but never creates poison or undef
   /// (e.g. sdiv I, 0), canCreatePoison returns false.
   ///
+  /// \p ConsiderFlags controls whether poison producing flags on the
+  /// instruction are considered.  This can be used to see if the instruction
+  /// could still introduce undef or poison even without poison generating flags
+  /// which might be on the instruction.  (i.e. could the result of
+  /// Op->dropPoisonGeneratingFlags() still create poison or undef)
+  ///
   /// canCreatePoison returns true if Op can create poison from non-poison
   /// operands.
-  bool canCreateUndefOrPoison(const Operator *Op);
-  bool canCreatePoison(const Operator *Op);
+  bool canCreateUndefOrPoison(const Operator *Op, bool ConsiderFlags = true);
+  bool canCreatePoison(const Operator *Op, bool ConsiderFlags = true);
 
   /// Return true if V is poison given that ValAssumedPoison is already poison.
   /// For example, if ValAssumedPoison is `icmp X, 10` and V is `icmp X, 5`,
@@ -744,6 +772,10 @@ constexpr unsigned MaxAnalysisRecursionDepth = 6;
   /// minimum/maximum flavor.
   CmpInst::Predicate getInverseMinMaxPred(SelectPatternFlavor SPF);
 
+  /// Return the minimum or maximum constant value for the specified integer
+  /// min/max flavor and type.
+  APInt getMinMaxLimit(SelectPatternFlavor SPF, unsigned BitWidth);
+
   /// Check if the values in \p VL are select instructions that can be converted
   /// to a min or max (vector) intrinsic. Returns the intrinsic ID, if such a
   /// conversion is possible, together with a bool indicating whether all select
diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h
index c890216c9e01..24e2318de48b 100644
--- a/llvm/include/llvm/Analysis/VectorUtils.h
+++ b/llvm/include/llvm/Analysis/VectorUtils.h
@@ -533,6 +533,12 @@ llvm::SmallVector<int, 16> createStrideMask(unsigned Start, unsigned Stride,
 llvm::SmallVector<int, 16>
 createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs);
 
+/// Given a shuffle mask for a binary shuffle, create the equivalent shuffle
+/// mask assuming both operands are identical. This assumes that the unary
+/// shuffle will use elements from operand 0 (operand 1 will be unused).
+llvm::SmallVector<int, 16> createUnaryMask(ArrayRef<int> Mask,
+                                           unsigned NumElts);
+
 /// Concatenate a list of vectors.
 ///
 /// This function generates code that concatenate the vectors in \p Vecs into a
@@ -686,10 +692,8 @@ public:
     if (getMember(getFactor() - 1))
       return false;
 
-    // We have a group with gaps. It therefore cannot be a group of stores,
-    // and it can't be a reversed access, because such groups get invalidated.
-    assert(!getMember(0)->mayWriteToMemory() &&
-           "Group should have been invalidated");
+    // We have a group with gaps. It therefore can't be a reversed access,
+    // because such groups get invalidated (TODO).
     assert(!isReverse() && "Group should have been invalidated");
 
     // This is a group of loads, with gaps, and without a last-member
diff --git a/llvm/include/llvm/AsmParser/LLLexer.h b/llvm/include/llvm/AsmParser/LLLexer.h
index c97d9781c33b..c30165e4a97b 100644
--- a/llvm/include/llvm/AsmParser/LLLexer.h
+++ b/llvm/include/llvm/AsmParser/LLLexer.h
@@ -10,8 +10,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_LIB_ASMPARSER_LLLEXER_H
-#define LLVM_LIB_ASMPARSER_LLLEXER_H
+#ifndef LLVM_ASMPARSER_LLLEXER_H
+#define LLVM_ASMPARSER_LLLEXER_H
 
 #include "LLToken.h"
 #include "llvm/ADT/APFloat.h"
diff --git a/llvm/include/llvm/AsmParser/LLParser.h b/llvm/include/llvm/AsmParser/LLParser.h
index 70db9218fa3d..d621c232378c 100644
--- a/llvm/include/llvm/AsmParser/LLParser.h
+++ b/llvm/include/llvm/AsmParser/LLParser.h
@@ -10,8 +10,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_LIB_ASMPARSER_LLPARSER_H
-#define LLVM_LIB_ASMPARSER_LLPARSER_H
+#ifndef LLVM_ASMPARSER_LLPARSER_H
+#define LLVM_ASMPARSER_LLPARSER_H
 
 #include "LLLexer.h"
 #include "llvm/ADT/Optional.h"
@@ -172,9 +172,8 @@ namespace llvm {
     /// getGlobalVal - Get a value with the specified name or ID, creating a
     /// forward reference record if needed.  This can return null if the value
     /// exists but does not have the right type.
-    GlobalValue *getGlobalVal(const std::string &N, Type *Ty, LocTy Loc,
-                              bool IsCall);
-    GlobalValue *getGlobalVal(unsigned ID, Type *Ty, LocTy Loc, bool IsCall);
+    GlobalValue *getGlobalVal(const std::string &N, Type *Ty, LocTy Loc);
+    GlobalValue *getGlobalVal(unsigned ID, Type *Ty, LocTy Loc);
 
     /// Get a Comdat with the specified name, creating a forward reference
     /// record if needed.
@@ -270,7 +269,6 @@ namespace llvm {
     bool parseOptionalCommaAlign(MaybeAlign &Alignment, bool &AteExtraComma);
     bool parseOptionalCommaAddrSpace(unsigned &AddrSpace, LocTy &Loc,
                                      bool &AteExtraComma);
-    bool parseOptionalCommaInAlloca(bool &IsInAlloca);
     bool parseAllocSizeArguments(unsigned &BaseSizeArg,
                                  Optional<unsigned> &HowManyArg);
     bool parseVScaleRangeArguments(unsigned &MinValue, unsigned &MaxValue);
@@ -306,11 +304,10 @@ namespace llvm {
                      unsigned DLLStorageClass, bool DSOLocal,
                      GlobalVariable::ThreadLocalMode TLM,
                      GlobalVariable::UnnamedAddr UnnamedAddr);
-    bool parseIndirectSymbol(const std::string &Name, LocTy NameLoc,
-                             unsigned L, unsigned Visibility,
-                             unsigned DLLStorageClass, bool DSOLocal,
-                             GlobalVariable::ThreadLocalMode TLM,
-                             GlobalVariable::UnnamedAddr UnnamedAddr);
+    bool parseAliasOrIFunc(const std::string &Name, LocTy NameLoc, unsigned L,
+                           unsigned Visibility, unsigned DLLStorageClass,
+                           bool DSOLocal, GlobalVariable::ThreadLocalMode TLM,
+                           GlobalVariable::UnnamedAddr UnnamedAddr);
     bool parseComdat();
     bool parseStandaloneMetadata();
     bool parseNamedMetadata();
@@ -424,8 +421,8 @@ namespace llvm {
       /// GetVal - Get a value with the specified name or ID, creating a
       /// forward reference record if needed.  This can return null if the value
       /// exists but does not have the right type.
-      Value *getVal(const std::string &Name, Type *Ty, LocTy Loc, bool IsCall);
-      Value *getVal(unsigned ID, Type *Ty, LocTy Loc, bool IsCall);
+      Value *getVal(const std::string &Name, Type *Ty, LocTy Loc);
+      Value *getVal(unsigned ID, Type *Ty, LocTy Loc);
 
       /// setInstName - After an instruction is parsed and inserted into its
       /// basic block, this installs its name.
@@ -447,10 +444,10 @@ namespace llvm {
     };
 
     bool convertValIDToValue(Type *Ty, ValID &ID, Value *&V,
-                             PerFunctionState *PFS, bool IsCall);
+                             PerFunctionState *PFS);
 
     Value *checkValidVariableType(LocTy Loc, const Twine &Name, Type *Ty,
-                                  Value *Val, bool IsCall);
+                                  Value *Val);
 
     bool parseConstantValue(Type *Ty, Constant *&C);
     bool parseValue(Type *Ty, Value *&V, PerFunctionState *PFS);
diff --git a/llvm/include/llvm/AsmParser/LLToken.h b/llvm/include/llvm/AsmParser/LLToken.h
index aa49c68fe924..f8ca054863ac 100644
--- a/llvm/include/llvm/AsmParser/LLToken.h
+++ b/llvm/include/llvm/AsmParser/LLToken.h
@@ -10,8 +10,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_LIB_ASMPARSER_LLTOKEN_H
-#define LLVM_LIB_ASMPARSER_LLTOKEN_H
+#ifndef LLVM_ASMPARSER_LLTOKEN_H
+#define LLVM_ASMPARSER_LLTOKEN_H
 
 namespace llvm {
 namespace lltok {
@@ -190,6 +190,7 @@ enum Kind {
   kw_convergent,
   kw_dereferenceable,
   kw_dereferenceable_or_null,
+  kw_disable_sanitizer_instrumentation,
   kw_elementtype,
   kw_inaccessiblememonly,
   kw_inaccessiblemem_or_argmemonly,
@@ -403,6 +404,9 @@ enum Kind {
   kw_returnDoesNotAlias,
   kw_noInline,
   kw_alwaysInline,
+  kw_noUnwind,
+  kw_mayThrow,
+  kw_hasUnknownCall,
   kw_calls,
   kw_callee,
   kw_params,
diff --git a/llvm/include/llvm/BinaryFormat/Dwarf.def b/llvm/include/llvm/BinaryFormat/Dwarf.def
index 34f124b5779a..61f3f27ebb47 100644
--- a/llvm/include/llvm/BinaryFormat/Dwarf.def
+++ b/llvm/include/llvm/BinaryFormat/Dwarf.def
@@ -248,6 +248,9 @@ HANDLE_DW_TAG(0x5103, ALTIUM_rev_carry_type, 0, ALTIUM, DW_KIND_NONE)
 // M16 __rom qualifier
 HANDLE_DW_TAG(0x5111, ALTIUM_rom, 0, ALTIUM, DW_KIND_NONE)
 
+// LLVM
+HANDLE_DW_TAG(0x6000, LLVM_annotation, 0, LLVM, DW_KIND_NONE)
+
 // Green Hills.
 HANDLE_DW_TAG(0x8004, GHS_namespace, 0, GHS, DW_KIND_NONE)
 HANDLE_DW_TAG(0x8005, GHS_using_namespace, 0, GHS, DW_KIND_NONE)
diff --git a/llvm/include/llvm/BinaryFormat/DynamicTags.def b/llvm/include/llvm/BinaryFormat/DynamicTags.def
index c08f8a53bdb5..814d8b113ec4 100644
--- a/llvm/include/llvm/BinaryFormat/DynamicTags.def
+++ b/llvm/include/llvm/BinaryFormat/DynamicTags.def
@@ -31,6 +31,11 @@
 #define PPC64_DYNAMIC_TAG_DEFINED
 #endif
 
+#ifndef RISCV_DYNAMIC_TAG
+#define RISCV_DYNAMIC_TAG(name, value) DYNAMIC_TAG(name, value)
+#define RISCV_DYNAMIC_TAG_DEFINED
+#endif
+
 #ifndef DYNAMIC_TAG_MARKER
 #define DYNAMIC_TAG_MARKER(name, value) DYNAMIC_TAG(name, value)
 #define DYNAMIC_TAG_MARKER_DEFINED
@@ -213,6 +218,9 @@ PPC_DYNAMIC_TAG(PPC_OPT, 0x70000001) // Has TLS optimization.
 PPC64_DYNAMIC_TAG(PPC64_GLINK, 0x70000000) // Address of 32 bytes before the
                                            // first glink lazy resolver stub.
 
+// RISC-V specific dynamic array tags.
+RISCV_DYNAMIC_TAG(RISCV_VARIANT_CC, 0x70000001)
+
 // Sun machine-independent extensions.
 DYNAMIC_TAG(AUXILIARY, 0x7FFFFFFD) // Shared object to load before self
 DYNAMIC_TAG(USED, 0x7FFFFFFE)      // Same as DT_NEEDED
@@ -243,3 +251,7 @@ DYNAMIC_TAG(FILTER, 0x7FFFFFFF)    // Shared object to get values from
 #undef PPC64_DYNAMIC_TAG
 #undef PPC64_DYNAMIC_TAG_DEFINED
 #endif
+#ifdef RISCV_DYNAMIC_TAG_DEFINED
+#undef RISCV_DYNAMIC_TAG
+#undef RISCV_DYNAMIC_TAG_DEFINED
+#endif
diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h
index 6148f968cdba..a270fd399aeb 100644
--- a/llvm/include/llvm/BinaryFormat/ELF.h
+++ b/llvm/include/llvm/BinaryFormat/ELF.h
@@ -660,6 +660,12 @@ enum {
 #include "ELFRelocs/RISCV.def"
 };
 
+enum {
+  // Symbol may follow different calling convention than the standard calling
+  // convention.
+  STO_RISCV_VARIANT_CC = 0x80
+};
+
 // ELF Relocation types for S390/zSeries
 enum {
 #include "ELFRelocs/SystemZ.def"
@@ -1596,6 +1602,16 @@ enum {
   NT_FREEBSD_PROCSTAT_AUXV = 16,
 };
 
+// OpenBSD core note types.
+enum {
+  NT_OPENBSD_PROCINFO = 10,
+  NT_OPENBSD_AUXV = 11,
+  NT_OPENBSD_REGS = 20,
+  NT_OPENBSD_FPREGS = 21,
+  NT_OPENBSD_XFPREGS = 22,
+  NT_OPENBSD_WCOOKIE = 23,
+};
+
 // AMDGPU-specific section indices.
 enum {
   SHN_AMDGPU_LDS = 0xff00, // Variable in LDS; symbol encoded like SHN_COMMON
@@ -1618,6 +1634,13 @@ enum {
   NT_AMDGPU_METADATA = 32
 };
 
+// LLVMOMPOFFLOAD specific notes.
+enum : unsigned {
+  NT_LLVM_OPENMP_OFFLOAD_VERSION = 1,
+  NT_LLVM_OPENMP_OFFLOAD_PRODUCER = 2,
+  NT_LLVM_OPENMP_OFFLOAD_PRODUCER_VERSION = 3
+};
+
 enum {
   GNU_ABI_TAG_LINUX = 0,
   GNU_ABI_TAG_HURD = 1,
diff --git a/llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def b/llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def
index 9f2f0540bcbd..454450950444 100644
--- a/llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def
+++ b/llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def
@@ -46,10 +46,6 @@ ELF_RELOC(R_RISCV_ALIGN,             43)
 ELF_RELOC(R_RISCV_RVC_BRANCH,        44)
 ELF_RELOC(R_RISCV_RVC_JUMP,          45)
 ELF_RELOC(R_RISCV_RVC_LUI,           46)
-ELF_RELOC(R_RISCV_GPREL_I,           47)
-ELF_RELOC(R_RISCV_GPREL_S,           48)
-ELF_RELOC(R_RISCV_TPREL_I,           49)
-ELF_RELOC(R_RISCV_TPREL_S,           50)
 ELF_RELOC(R_RISCV_RELAX,             51)
 ELF_RELOC(R_RISCV_SUB6,              52)
 ELF_RELOC(R_RISCV_SET6,              53)
diff --git a/llvm/include/llvm/BinaryFormat/MachO.def b/llvm/include/llvm/BinaryFormat/MachO.def
index 76dcc58ba048..f68ecefa6c9e 100644
--- a/llvm/include/llvm/BinaryFormat/MachO.def
+++ b/llvm/include/llvm/BinaryFormat/MachO.def
@@ -74,6 +74,8 @@ HANDLE_LOAD_COMMAND(LC_VERSION_MIN_TVOS, 0x0000002Fu, version_min_command)
 HANDLE_LOAD_COMMAND(LC_VERSION_MIN_WATCHOS, 0x00000030u, version_min_command)
 HANDLE_LOAD_COMMAND(LC_NOTE, 0x00000031u, note_command)
 HANDLE_LOAD_COMMAND(LC_BUILD_VERSION, 0x00000032u, build_version_command)
+HANDLE_LOAD_COMMAND(LC_DYLD_EXPORTS_TRIE, 0x80000033u, linkedit_data_command)
+HANDLE_LOAD_COMMAND(LC_DYLD_CHAINED_FIXUPS, 0x80000034u, linkedit_data_command)
 
 #endif
 
diff --git a/llvm/include/llvm/BinaryFormat/Wasm.h b/llvm/include/llvm/BinaryFormat/Wasm.h
index c38e64928521..0bc8c4e167d8 100644
--- a/llvm/include/llvm/BinaryFormat/Wasm.h
+++ b/llvm/include/llvm/BinaryFormat/Wasm.h
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 //
 // This file defines manifest constants for the wasm object file format.
-// See: https://github.com/WebAssembly/design/blob/master/BinaryEncoding.md
+// See: https://github.com/WebAssembly/design/blob/main/BinaryEncoding.md
 //
 //===----------------------------------------------------------------------===//
 
@@ -36,12 +36,25 @@ struct WasmObjectHeader {
   uint32_t Version;
 };
 
+struct WasmDylinkImportInfo {
+  StringRef Module;
+  StringRef Field;
+  uint32_t Flags;
+};
+
+struct WasmDylinkExportInfo {
+  StringRef Name;
+  uint32_t Flags;
+};
+
 struct WasmDylinkInfo {
   uint32_t MemorySize; // Memory size in bytes
   uint32_t MemoryAlignment;  // P2 alignment of memory
   uint32_t TableSize;  // Table size in elements
   uint32_t TableAlignment;  // P2 alignment of table
   std::vector<StringRef> Needed; // Shared library dependencies
+  std::vector<WasmDylinkImportInfo> ImportInfo;
+  std::vector<WasmDylinkExportInfo> ExportInfo;
 };
 
 struct WasmProducerInfo {
@@ -101,15 +114,9 @@ struct WasmGlobal {
   StringRef SymbolName; // from the "linking" section
 };
 
-struct WasmTagType {
-  // Kind of tag. Currently only WASM_TAG_ATTRIBUTE_EXCEPTION is possible.
-  uint8_t Attribute;
-  uint32_t SigIndex;
-};
-
 struct WasmTag {
   uint32_t Index;
-  WasmTagType Type;
+  uint32_t SigIndex;
   StringRef SymbolName; // from the "linking" section
 };
 
@@ -122,7 +129,6 @@ struct WasmImport {
     WasmGlobalType Global;
     WasmTableType Table;
     WasmLimits Memory;
-    WasmTagType Tag;
   };
 };
 
@@ -133,6 +139,7 @@ struct WasmLocalDecl {
 
 struct WasmFunction {
   uint32_t Index;
+  uint32_t SigIndex;
   std::vector<WasmLocalDecl> Locals;
   ArrayRef<uint8_t> Body;
   uint32_t CodeSectionOffset;
@@ -284,11 +291,14 @@ enum : unsigned {
 
 // Opcodes used in synthetic functions.
 enum : unsigned {
-  WASM_OPCODE_IF = 0x04,
-  WASM_OPCODE_ELSE = 0x05,
+  WASM_OPCODE_BLOCK = 0x02,
+  WASM_OPCODE_BR = 0x0c,
+  WASM_OPCODE_BR_TABLE = 0x0e,
+  WASM_OPCODE_RETURN = 0x0f,
   WASM_OPCODE_DROP = 0x1a,
   WASM_OPCODE_MISC_PREFIX = 0xfc,
   WASM_OPCODE_MEMORY_INIT = 0x08,
+  WASM_OPCODE_MEMORY_FILL = 0x0b,
   WASM_OPCODE_DATA_DROP = 0x09,
   WASM_OPCODE_ATOMICS_PREFIX = 0xfe,
   WASM_OPCODE_ATOMIC_NOTIFY = 0x00,
@@ -339,6 +349,14 @@ enum : unsigned {
   WASM_SYMBOL_TABLE = 0x8,
 };
 
+// Kind codes used in the custom "dylink" section
+enum : unsigned {
+  WASM_DYLINK_MEM_INFO = 0x1,
+  WASM_DYLINK_NEEDED = 0x2,
+  WASM_DYLINK_EXPORT_INFO = 0x3,
+  WASM_DYLINK_IMPORT_INFO = 0x4,
+};
+
 // Kind codes used in the custom "linking" section in the WASM_COMDAT_INFO
 enum : unsigned {
   WASM_COMDAT_DATA = 0x0,
@@ -379,6 +397,7 @@ const unsigned WASM_SYMBOL_UNDEFINED = 0x10;
 const unsigned WASM_SYMBOL_EXPORTED = 0x20;
 const unsigned WASM_SYMBOL_EXPLICIT_NAME = 0x40;
 const unsigned WASM_SYMBOL_NO_STRIP = 0x80;
+const unsigned WASM_SYMBOL_TLS = 0x100;
 
 #define WASM_RELOC(name, value) name = value,
 
diff --git a/llvm/include/llvm/BinaryFormat/WasmTraits.h b/llvm/include/llvm/BinaryFormat/WasmTraits.h
index 930ee690bcc0..bef9dd3291ca 100644
--- a/llvm/include/llvm/BinaryFormat/WasmTraits.h
+++ b/llvm/include/llvm/BinaryFormat/WasmTraits.h
@@ -18,10 +18,8 @@
 
 namespace llvm {
 
-template <typename T> struct DenseMapInfo;
-
 // Traits for using WasmSignature in a DenseMap.
-template <> struct DenseMapInfo<wasm::WasmSignature> {
+template <> struct DenseMapInfo<wasm::WasmSignature, void> {
   static wasm::WasmSignature getEmptyKey() {
     wasm::WasmSignature Sig;
     Sig.State = wasm::WasmSignature::Empty;
@@ -47,7 +45,7 @@ template <> struct DenseMapInfo<wasm::WasmSignature> {
 };
 
 // Traits for using WasmGlobalType in a DenseMap
-template <> struct DenseMapInfo<wasm::WasmGlobalType> {
+template <> struct DenseMapInfo<wasm::WasmGlobalType, void> {
   static wasm::WasmGlobalType getEmptyKey() {
     return wasm::WasmGlobalType{1, true};
   }
@@ -64,7 +62,7 @@ template <> struct DenseMapInfo<wasm::WasmGlobalType> {
 };
 
 // Traits for using WasmLimits in a DenseMap
-template <> struct DenseMapInfo<wasm::WasmLimits> {
+template <> struct DenseMapInfo<wasm::WasmLimits, void> {
   static wasm::WasmLimits getEmptyKey() {
     return wasm::WasmLimits{0xff, 0xff, 0xff};
   }
@@ -86,19 +84,19 @@ template <> struct DenseMapInfo<wasm::WasmLimits> {
 };
 
 // Traits for using WasmTableType in a DenseMap
-template <> struct DenseMapInfo<wasm::WasmTableType> {
+template <> struct DenseMapInfo<wasm::WasmTableType, void> {
   static wasm::WasmTableType getEmptyKey() {
-    return wasm::WasmTableType{0,
-                               DenseMapInfo<wasm::WasmLimits>::getEmptyKey()};
+    return wasm::WasmTableType{
+        0, DenseMapInfo<wasm::WasmLimits, void>::getEmptyKey()};
   }
   static wasm::WasmTableType getTombstoneKey() {
     return wasm::WasmTableType{
-        1, DenseMapInfo<wasm::WasmLimits>::getTombstoneKey()};
+        1, DenseMapInfo<wasm::WasmLimits, void>::getTombstoneKey()};
   }
   static unsigned getHashValue(const wasm::WasmTableType &TableType) {
     return hash_combine(
         TableType.ElemType,
-        DenseMapInfo<wasm::WasmLimits>::getHashValue(TableType.Limits));
+        DenseMapInfo<wasm::WasmLimits, void>::getHashValue(TableType.Limits));
   }
   static bool isEqual(const wasm::WasmTableType &LHS,
                       const wasm::WasmTableType &RHS) {
diff --git a/llvm/include/llvm/BinaryFormat/XCOFF.h b/llvm/include/llvm/BinaryFormat/XCOFF.h
index 8a42d26f3f4a..cffd8618f1e3 100644
--- a/llvm/include/llvm/BinaryFormat/XCOFF.h
+++ b/llvm/include/llvm/BinaryFormat/XCOFF.h
@@ -28,9 +28,14 @@ namespace XCOFF {
 constexpr size_t FileNamePadSize = 6;
 constexpr size_t NameSize = 8;
 constexpr size_t FileHeaderSize32 = 20;
+constexpr size_t FileHeaderSize64 = 24;
+constexpr size_t AuxFileHeaderSize32 = 72;
+constexpr size_t AuxFileHeaderSize64 = 110;
 constexpr size_t SectionHeaderSize32 = 40;
+constexpr size_t SectionHeaderSize64 = 72;
 constexpr size_t SymbolTableEntrySize = 18;
 constexpr size_t RelocationSerializationSize32 = 10;
+constexpr size_t RelocationSerializationSize64 = 14;
 constexpr uint16_t RelocOverflow = 65535;
 constexpr uint8_t AllocRegNo = 31;
 
@@ -38,6 +43,17 @@ enum ReservedSectionNum : int16_t { N_DEBUG = -2, N_ABS = -1, N_UNDEF = 0 };
 
 enum MagicNumber : uint16_t { XCOFF32 = 0x01DF, XCOFF64 = 0x01F7 };
 
+// This field only exists in the XCOFF64 definition.
+enum AuxHeaderFlags64 : uint16_t {
+  SHR_SYMTAB = 0x8000,  ///< At exec time, create shared symbol table for program
+                        ///< (main program only).
+  FORK_POLICY = 0x4000, ///< Forktree policy specified (main program only).
+  FORK_COR = 0x2000     ///< If _AOUT_FORK_POLICY is set, specify copy-on-reference
+                        ///< if this bit is set. Specify copy-on- write otherwise.
+                        ///< If _AOUT_FORK_POLICY is 0, this bit is reserved for
+                        ///< future use and should be set to 0.
+};
+
 // x_smclas field of x_csect from system header: /usr/include/syms.h
 /// Storage Mapping Class definitions.
 enum StorageMappingClass : uint8_t {
diff --git a/llvm/include/llvm/Bitcode/BitcodeAnalyzer.h b/llvm/include/llvm/Bitcode/BitcodeAnalyzer.h
index de828be3bf1b..f6fc284da33f 100644
--- a/llvm/include/llvm/Bitcode/BitcodeAnalyzer.h
+++ b/llvm/include/llvm/Bitcode/BitcodeAnalyzer.h
@@ -42,6 +42,8 @@ struct BCDumpOptions {
   bool Symbolic = false;
   /// Print binary blobs using hex escapes.
   bool ShowBinaryBlobs = false;
+  /// Print BLOCKINFO block details.
+  bool DumpBlockinfo = false;
 
   BCDumpOptions(raw_ostream &OS) : OS(OS) {}
 };
diff --git a/llvm/include/llvm/Bitcode/BitcodeCommon.h b/llvm/include/llvm/Bitcode/BitcodeCommon.h
index 6a3e74550bc4..22d1872fe49c 100644
--- a/llvm/include/llvm/Bitcode/BitcodeCommon.h
+++ b/llvm/include/llvm/Bitcode/BitcodeCommon.h
@@ -19,10 +19,14 @@
 namespace llvm {
 
 struct AllocaPackedValues {
-  using Align = Bitfield::Element<unsigned, 0, 5>;
-  using UsedWithInAlloca = Bitfield::Element<bool, Align::NextBit, 1>;
+  // We increased the number of bits needed to represent alignment to be more
+  // than 5, but to preserve backward compatibility we store the upper bits
+  // separately.
+  using AlignLower = Bitfield::Element<unsigned, 0, 5>;
+  using UsedWithInAlloca = Bitfield::Element<bool, AlignLower::NextBit, 1>;
   using ExplicitType = Bitfield::Element<bool, UsedWithInAlloca::NextBit, 1>;
   using SwiftError = Bitfield::Element<bool, ExplicitType::NextBit, 1>;
+  using AlignUpper = Bitfield::Element<unsigned, SwiftError::NextBit, 3>;
 };
 
 } // namespace llvm
diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
index 28870afb2fcb..04eb2739cbd5 100644
--- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
@@ -671,6 +671,7 @@ enum AttributeKindCodes {
   ATTR_KIND_SWIFT_ASYNC = 75,
   ATTR_KIND_NO_SANITIZE_COVERAGE = 76,
   ATTR_KIND_ELEMENTTYPE = 77,
+  ATTR_KIND_DISABLE_SANITIZER_INSTRUMENTATION = 78,
 };
 
 enum ComdatSelectionKindCodes {
diff --git a/llvm/include/llvm/CodeGen/Analysis.h b/llvm/include/llvm/CodeGen/Analysis.h
index bdfb416d9bd9..60442326d6c7 100644
--- a/llvm/include/llvm/CodeGen/Analysis.h
+++ b/llvm/include/llvm/CodeGen/Analysis.h
@@ -104,9 +104,12 @@ ISD::CondCode getFCmpCodeWithoutNaN(ISD::CondCode CC);
 
 /// getICmpCondCode - Return the ISD condition code corresponding to
 /// the given LLVM IR integer condition code.
-///
 ISD::CondCode getICmpCondCode(ICmpInst::Predicate Pred);
 
+/// getICmpCondCode - Return the LLVM IR integer condition code
+/// corresponding to the given ISD integer condition code.
+ICmpInst::Predicate getICmpCondCode(ISD::CondCode Pred);
+
 /// Test if the given instruction is in a position to be optimized
 /// with a tail-call. This roughly means that it's in a block with
 /// a return and there's nothing that needs to be scheduled
diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h
index 5dea86e67d64..d7d3692877de 100644
--- a/llvm/include/llvm/CodeGen/AsmPrinter.h
+++ b/llvm/include/llvm/CodeGen/AsmPrinter.h
@@ -41,7 +41,6 @@ class DIEAbbrev;
 class DwarfDebug;
 class GCMetadataPrinter;
 class GCStrategy;
-class GlobalIndirectSymbol;
 class GlobalObject;
 class GlobalValue;
 class GlobalVariable;
@@ -708,7 +707,7 @@ public:
   /// ${:comment}.  Targets can override this to add support for their own
   /// strange codes.
   virtual void PrintSpecial(const MachineInstr *MI, raw_ostream &OS,
-                            const char *Code) const;
+                            StringRef Code) const;
 
   /// Print the MachineOperand as a symbol. Targets with complex handling of
   /// symbol references should override the base implementation.
@@ -795,8 +794,8 @@ private:
   void emitModuleCommandLines(Module &M);
 
   GCMetadataPrinter *GetOrCreateGCPrinter(GCStrategy &S);
-  /// Emit GlobalAlias or GlobalIFunc.
-  void emitGlobalIndirectSymbol(Module &M, const GlobalIndirectSymbol &GIS);
+  void emitGlobalAlias(Module &M, const GlobalAlias &GA);
+  void emitGlobalIFunc(Module &M, const GlobalIFunc &GI);
 
   /// This method decides whether the specified basic block requires a label.
   bool shouldEmitLabelForBasicBlock(const MachineBasicBlock &MBB) const;
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index e3b834ec42c3..324b7dcfb3ac 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -22,6 +22,7 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/TargetTransformInfoImpl.h"
 #include "llvm/CodeGen/ISDOpcodes.h"
@@ -282,6 +283,11 @@ public:
     return getTLI()->getTargetMachine().getAssumedAddrSpace(V);
   }
 
+  std::pair<const Value *, unsigned>
+  getPredicatedAddrSpace(const Value *V) const {
+    return getTLI()->getTargetMachine().getPredicatedAddrSpace(V);
+  }
+
   Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
                                           Value *NewV) const {
     return nullptr;
@@ -363,8 +369,9 @@ public:
   }
 
   InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
-                             ArrayRef<const Value *> Operands) {
-    return BaseT::getGEPCost(PointeeType, Ptr, Operands);
+                             ArrayRef<const Value *> Operands,
+                             TTI::TargetCostKind CostKind) {
+    return BaseT::getGEPCost(PointeeType, Ptr, Operands, CostKind);
   }
 
   unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
@@ -484,7 +491,8 @@ public:
   int getInlinerVectorBonusPercent() { return 150; }
 
   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
-                               TTI::UnrollingPreferences &UP) {
+                               TTI::UnrollingPreferences &UP,
+                               OptimizationRemarkEmitter *ORE) {
     // This unrolling functionality is target independent, but to provide some
     // motivation for its intended use, for x86:
 
@@ -526,6 +534,15 @@ public:
               continue;
           }
 
+          if (ORE) {
+            ORE->emit([&]() {
+              return OptimizationRemark("TTI", "DontUnroll", L->getStartLoc(),
+                                        L->getHeader())
+                     << "advising against unrolling the loop because it "
+                        "contains a "
+                     << ore::NV("Call", &I);
+            });
+          }
           return;
         }
       }
@@ -653,6 +670,7 @@ public:
   }
 
   Optional<unsigned> getMaxVScale() const { return None; }
+  Optional<unsigned> getVScaleForTuning() const { return None; }
 
   /// Estimate the overhead of scalarizing an instruction. Insert and Extract
   /// are set if the demanded result elements need to be inserted and/or
@@ -686,7 +704,7 @@ public:
                                            bool Extract) {
     auto *Ty = cast<FixedVectorType>(InTy);
 
-    APInt DemandedElts = APInt::getAllOnesValue(Ty->getNumElements());
+    APInt DemandedElts = APInt::getAllOnes(Ty->getNumElements());
     return thisT()->getScalarizationOverhead(Ty, DemandedElts, Insert, Extract);
   }
 
@@ -737,8 +755,7 @@ public:
   unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
 
   InstructionCost getArithmeticInstrCost(
-      unsigned Opcode, Type *Ty,
-      TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
+      unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
       TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
       TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
       TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
@@ -1102,6 +1119,39 @@ public:
     return LT.first;
   }
 
+  InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor,
+                                            int VF,
+                                            const APInt &DemandedDstElts,
+                                            TTI::TargetCostKind CostKind) {
+    assert(DemandedDstElts.getBitWidth() == (unsigned)VF * ReplicationFactor &&
+           "Unexpected size of DemandedDstElts.");
+
+    InstructionCost Cost;
+
+    auto *SrcVT = FixedVectorType::get(EltTy, VF);
+    auto *ReplicatedVT = FixedVectorType::get(EltTy, VF * ReplicationFactor);
+
+    // The Mask shuffling cost is extract all the elements of the Mask
+    // and insert each of them Factor times into the wide vector:
+    //
+    // E.g. an interleaved group with factor 3:
+    //    %mask = icmp ult <8 x i32> %vec1, %vec2
+    //    %interleaved.mask = shufflevector <8 x i1> %mask, <8 x i1> undef,
+    //        <24 x i32> <0,0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7>
+    // The cost is estimated as extract all mask elements from the <8xi1> mask
+    // vector and insert them factor times into the <24xi1> shuffled mask
+    // vector.
+    APInt DemandedSrcElts = APIntOps::ScaleBitMask(DemandedDstElts, VF);
+    Cost += thisT()->getScalarizationOverhead(SrcVT, DemandedSrcElts,
+                                              /*Insert*/ false,
+                                              /*Extract*/ true);
+    Cost +=
+        thisT()->getScalarizationOverhead(ReplicatedVT, DemandedDstElts,
+                                          /*Insert*/ true, /*Extract*/ false);
+
+    return Cost;
+  }
+
   InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src,
                                   MaybeAlign Alignment, unsigned AddressSpace,
                                   TTI::TargetCostKind CostKind,
@@ -1201,9 +1251,9 @@ public:
     // used (those corresponding to elements [0:1] and [8:9] of the unlegalized
     // type). The other loads are unused.
     //
-    // We only scale the cost of loads since interleaved store groups aren't
-    // allowed to have gaps.
-    if (Opcode == Instruction::Load && VecTySize > VecTyLTSize) {
+    // TODO: Note that legalization can turn masked loads/stores into unmasked
+    // (legalized) loads/stores. This can be reflected in the cost.
+    if (Cost.isValid() && VecTySize > VecTyLTSize) {
       // The number of loads of a legal type it will take to represent a load
       // of the unlegalized vector type.
       unsigned NumLegalInsts = divideCeil(VecTySize, VecTyLTSize);
@@ -1220,10 +1270,24 @@ public:
 
       // Scale the cost of the load by the fraction of legal instructions that
       // will be used.
-      Cost *= UsedInsts.count() / NumLegalInsts;
+      Cost = divideCeil(UsedInsts.count() * Cost.getValue().getValue(),
+                        NumLegalInsts);
     }
 
     // Then plus the cost of interleave operation.
+    assert(Indices.size() <= Factor &&
+           "Interleaved memory op has too many members");
+
+    const APInt DemandedAllSubElts = APInt::getAllOnes(NumSubElts);
+    const APInt DemandedAllResultElts = APInt::getAllOnes(NumElts);
+
+    APInt DemandedLoadStoreElts = APInt::getZero(NumElts);
+    for (unsigned Index : Indices) {
+      assert(Index < Factor && "Invalid index for interleaved memory op");
+      for (unsigned Elm = 0; Elm < NumSubElts; Elm++)
+        DemandedLoadStoreElts.setBit(Index + Elm * Factor);
+    }
+
     if (Opcode == Instruction::Load) {
       // The interleave cost is similar to extract sub vectors' elements
       // from the wide vector, and insert them into sub vectors.
@@ -1233,79 +1297,56 @@ public:
       //      %v0 = shuffle %vec, undef, <0, 2, 4, 6>         ; Index 0
       // The cost is estimated as extract elements at 0, 2, 4, 6 from the
       // <8 x i32> vector and insert them into a <4 x i32> vector.
-
-      assert(Indices.size() <= Factor &&
-             "Interleaved memory op has too many members");
-
-      for (unsigned Index : Indices) {
-        assert(Index < Factor && "Invalid index for interleaved memory op");
-
-        // Extract elements from loaded vector for each sub vector.
-        for (unsigned i = 0; i < NumSubElts; i++)
-          Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VT,
-                                              Index + i * Factor);
-      }
-
-      InstructionCost InsSubCost = 0;
-      for (unsigned i = 0; i < NumSubElts; i++)
-        InsSubCost +=
-            thisT()->getVectorInstrCost(Instruction::InsertElement, SubVT, i);
-
+      InstructionCost InsSubCost =
+          thisT()->getScalarizationOverhead(SubVT, DemandedAllSubElts,
+                                            /*Insert*/ true, /*Extract*/ false);
       Cost += Indices.size() * InsSubCost;
+      Cost +=
+          thisT()->getScalarizationOverhead(VT, DemandedLoadStoreElts,
+                                            /*Insert*/ false, /*Extract*/ true);
     } else {
-      // The interleave cost is extract all elements from sub vectors, and
+      // The interleave cost is extract elements from sub vectors, and
       // insert them into the wide vector.
       //
-      // E.g. An interleaved store of factor 2:
-      //      %v0_v1 = shuffle %v0, %v1, <0, 4, 1, 5, 2, 6, 3, 7>
-      //      store <8 x i32> %interleaved.vec, <8 x i32>* %ptr
-      // The cost is estimated as extract all elements from both <4 x i32>
-      // vectors and insert into the <8 x i32> vector.
-
-      InstructionCost ExtSubCost = 0;
-      for (unsigned i = 0; i < NumSubElts; i++)
-        ExtSubCost +=
-            thisT()->getVectorInstrCost(Instruction::ExtractElement, SubVT, i);
-      Cost += ExtSubCost * Factor;
-
-      for (unsigned i = 0; i < NumElts; i++)
-        Cost += static_cast<T *>(this)
-                    ->getVectorInstrCost(Instruction::InsertElement, VT, i);
+      // E.g. An interleaved store of factor 3 with 2 members at indices 0,1:
+      // (using VF=4):
+      //    %v0_v1 = shuffle %v0, %v1, <0,4,undef,1,5,undef,2,6,undef,3,7,undef>
+      //    %gaps.mask = <true, true, false, true, true, false,
+      //                  true, true, false, true, true, false>
+      //    call llvm.masked.store <12 x i32> %v0_v1, <12 x i32>* %ptr,
+      //                           i32 Align, <12 x i1> %gaps.mask
+      // The cost is estimated as extract all elements (of actual members,
+      // excluding gaps) from both <4 x i32> vectors and insert into the <12 x
+      // i32> vector.
+      InstructionCost ExtSubCost =
+          thisT()->getScalarizationOverhead(SubVT, DemandedAllSubElts,
+                                            /*Insert*/ false, /*Extract*/ true);
+      Cost += ExtSubCost * Indices.size();
+      Cost += thisT()->getScalarizationOverhead(VT, DemandedLoadStoreElts,
+                                                /*Insert*/ true,
+                                                /*Extract*/ false);
     }
 
     if (!UseMaskForCond)
       return Cost;
 
     Type *I8Type = Type::getInt8Ty(VT->getContext());
-    auto *MaskVT = FixedVectorType::get(I8Type, NumElts);
-    SubVT = FixedVectorType::get(I8Type, NumSubElts);
-
-    // The Mask shuffling cost is extract all the elements of the Mask
-    // and insert each of them Factor times into the wide vector:
-    //
-    // E.g. an interleaved group with factor 3:
-    //    %mask = icmp ult <8 x i32> %vec1, %vec2
-    //    %interleaved.mask = shufflevector <8 x i1> %mask, <8 x i1> undef,
-    //        <24 x i32> <0,0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7>
-    // The cost is estimated as extract all mask elements from the <8xi1> mask
-    // vector and insert them factor times into the <24xi1> shuffled mask
-    // vector.
-    for (unsigned i = 0; i < NumSubElts; i++)
-      Cost +=
-          thisT()->getVectorInstrCost(Instruction::ExtractElement, SubVT, i);
 
-    for (unsigned i = 0; i < NumElts; i++)
-      Cost +=
-          thisT()->getVectorInstrCost(Instruction::InsertElement, MaskVT, i);
+    Cost += thisT()->getReplicationShuffleCost(
+        I8Type, Factor, NumSubElts,
+        UseMaskForGaps ? DemandedLoadStoreElts : DemandedAllResultElts,
+        CostKind);
 
     // The Gaps mask is invariant and created outside the loop, therefore the
     // cost of creating it is not accounted for here. However if we have both
     // a MaskForGaps and some other mask that guards the execution of the
     // memory access, we need to account for the cost of And-ing the two masks
     // inside the loop.
-    if (UseMaskForGaps)
+    if (UseMaskForGaps) {
+      auto *MaskVT = FixedVectorType::get(I8Type, NumElts);
       Cost += thisT()->getArithmeticInstrCost(BinaryOperator::And, MaskVT,
                                               CostKind);
+    }
 
     return Cost;
   }
@@ -1460,10 +1501,10 @@ public:
         Type *CondTy = RetTy->getWithNewBitWidth(1);
         Cost +=
             thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
-                                        CmpInst::BAD_ICMP_PREDICATE, CostKind);
+                                        CmpInst::ICMP_EQ, CostKind);
         Cost +=
             thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
-                                        CmpInst::BAD_ICMP_PREDICATE, CostKind);
+                                        CmpInst::ICMP_EQ, CostKind);
       }
       return Cost;
     }
@@ -1689,26 +1730,34 @@ public:
       return thisT()->getMinMaxReductionCost(
           VecOpTy, cast<VectorType>(CmpInst::makeCmpResultType(VecOpTy)),
           /*IsUnsigned=*/true, CostKind);
-    case Intrinsic::abs:
+    case Intrinsic::abs: {
+      // abs(X) = select(icmp(X,0),X,sub(0,X))
+      Type *CondTy = RetTy->getWithNewBitWidth(1);
+      CmpInst::Predicate Pred = CmpInst::ICMP_SGT;
+      InstructionCost Cost = 0;
+      Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
+                                          Pred, CostKind);
+      Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
+                                          Pred, CostKind);
+      // TODO: Should we add an OperandValueProperties::OP_Zero property?
+      Cost += thisT()->getArithmeticInstrCost(
+          BinaryOperator::Sub, RetTy, CostKind, TTI::OK_UniformConstantValue);
+      return Cost;
+    }
     case Intrinsic::smax:
     case Intrinsic::smin:
     case Intrinsic::umax:
     case Intrinsic::umin: {
-      // abs(X) = select(icmp(X,0),X,sub(0,X))
       // minmax(X,Y) = select(icmp(X,Y),X,Y)
       Type *CondTy = RetTy->getWithNewBitWidth(1);
+      bool IsUnsigned = IID == Intrinsic::umax || IID == Intrinsic::umin;
+      CmpInst::Predicate Pred =
+          IsUnsigned ? CmpInst::ICMP_UGT : CmpInst::ICMP_SGT;
       InstructionCost Cost = 0;
-      // TODO: Ideally getCmpSelInstrCost would accept an icmp condition code.
-      Cost +=
-          thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
-                                      CmpInst::BAD_ICMP_PREDICATE, CostKind);
-      Cost +=
-          thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
-                                      CmpInst::BAD_ICMP_PREDICATE, CostKind);
-      // TODO: Should we add an OperandValueProperties::OP_Zero property?
-      if (IID == Intrinsic::abs)
-        Cost += thisT()->getArithmeticInstrCost(
-            BinaryOperator::Sub, RetTy, CostKind, TTI::OK_UniformConstantValue);
+      Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
+                                          Pred, CostKind);
+      Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
+                                          Pred, CostKind);
       return Cost;
     }
     case Intrinsic::sadd_sat:
@@ -1719,6 +1768,7 @@ public:
       Intrinsic::ID OverflowOp = IID == Intrinsic::sadd_sat
                                      ? Intrinsic::sadd_with_overflow
                                      : Intrinsic::ssub_with_overflow;
+      CmpInst::Predicate Pred = CmpInst::ICMP_SGT;
 
       // SatMax -> Overflow && SumDiff < 0
       // SatMin -> Overflow && SumDiff >= 0
@@ -1726,12 +1776,10 @@ public:
       IntrinsicCostAttributes Attrs(OverflowOp, OpTy, {RetTy, RetTy}, FMF,
                                     nullptr, ScalarizationCostPassed);
       Cost += thisT()->getIntrinsicInstrCost(Attrs, CostKind);
-      Cost +=
-          thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
-                                      CmpInst::BAD_ICMP_PREDICATE, CostKind);
-      Cost += 2 * thisT()->getCmpSelInstrCost(
-                      BinaryOperator::Select, RetTy, CondTy,
-                      CmpInst::BAD_ICMP_PREDICATE, CostKind);
+      Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
+                                          Pred, CostKind);
+      Cost += 2 * thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
+                                              CondTy, Pred, CostKind);
       return Cost;
     }
     case Intrinsic::uadd_sat:
@@ -1784,23 +1832,16 @@ public:
                             ? BinaryOperator::Add
                             : BinaryOperator::Sub;
 
-      //   LHSSign -> LHS >= 0
-      //   RHSSign -> RHS >= 0
-      //   SumSign -> Sum >= 0
-      //
       //   Add:
-      //   Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
+      //   Overflow -> (Result < LHS) ^ (RHS < 0)
       //   Sub:
-      //   Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
+      //   Overflow -> (Result < LHS) ^ (RHS > 0)
       InstructionCost Cost = 0;
       Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind);
-      Cost += 3 * thisT()->getCmpSelInstrCost(
-                      Instruction::ICmp, SumTy, OverflowTy,
-                      CmpInst::BAD_ICMP_PREDICATE, CostKind);
       Cost += 2 * thisT()->getCmpSelInstrCost(
-                      Instruction::Select, OverflowTy, OverflowTy,
-                      CmpInst::BAD_ICMP_PREDICATE, CostKind);
-      Cost += thisT()->getArithmeticInstrCost(BinaryOperator::And, OverflowTy,
+                      Instruction::ICmp, SumTy, OverflowTy,
+                      CmpInst::ICMP_SGT, CostKind);
+      Cost += thisT()->getArithmeticInstrCost(BinaryOperator::Xor, OverflowTy,
                                               CostKind);
       return Cost;
     }
@@ -1811,12 +1852,15 @@ public:
       unsigned Opcode = IID == Intrinsic::uadd_with_overflow
                             ? BinaryOperator::Add
                             : BinaryOperator::Sub;
+      CmpInst::Predicate Pred = IID == Intrinsic::uadd_with_overflow
+                                    ? CmpInst::ICMP_ULT
+                                    : CmpInst::ICMP_UGT;
 
       InstructionCost Cost = 0;
       Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind);
       Cost +=
           thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, OverflowTy,
-                                      CmpInst::BAD_ICMP_PREDICATE, CostKind);
+                                      Pred, CostKind);
       return Cost;
     }
     case Intrinsic::smul_with_overflow:
@@ -1825,9 +1869,9 @@ public:
       Type *OverflowTy = RetTy->getContainedType(1);
       unsigned ExtSize = MulTy->getScalarSizeInBits() * 2;
       Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize);
+      bool IsSigned = IID == Intrinsic::smul_with_overflow;
 
-      unsigned ExtOp =
-          IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
+      unsigned ExtOp = IsSigned ? Instruction::SExt : Instruction::ZExt;
       TTI::CastContextHint CCH = TTI::CastContextHint::None;
 
       InstructionCost Cost = 0;
@@ -1836,18 +1880,17 @@ public:
           thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
       Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy,
                                             CCH, CostKind);
-      Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, MulTy,
+      Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, ExtTy,
                                               CostKind, TTI::OK_AnyValue,
                                               TTI::OK_UniformConstantValue);
 
-      if (IID == Intrinsic::smul_with_overflow)
+      if (IsSigned)
         Cost += thisT()->getArithmeticInstrCost(Instruction::AShr, MulTy,
                                                 CostKind, TTI::OK_AnyValue,
                                                 TTI::OK_UniformConstantValue);
 
-      Cost +=
-          thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy, OverflowTy,
-                                      CmpInst::BAD_ICMP_PREDICATE, CostKind);
+      Cost += thisT()->getCmpSelInstrCost(
+          BinaryOperator::ICmp, MulTy, OverflowTy, CmpInst::ICMP_NE, CostKind);
       return Cost;
     }
     case Intrinsic::ctpop:
@@ -1974,16 +2017,16 @@ public:
   /// \param RetTy Return value types.
   /// \param Tys Argument types.
   /// \returns The cost of Call instruction.
-  InstructionCost
-  getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys,
-                   TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) {
+  InstructionCost getCallInstrCost(Function *F, Type *RetTy,
+                                   ArrayRef<Type *> Tys,
+                                   TTI::TargetCostKind CostKind) {
     return 10;
   }
 
   unsigned getNumberOfParts(Type *Tp) {
     std::pair<InstructionCost, MVT> LT =
         getTLI()->getTypeLegalizationCost(DL, Tp);
-    return *LT.first.getValue();
+    return LT.first.isValid() ? *LT.first.getValue() : 0;
   }
 
   InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *,
@@ -2060,7 +2103,8 @@ public:
     // By default reductions need one shuffle per reduction level.
     ShuffleCost += NumReduxLevels * thisT()->getShuffleCost(
                                      TTI::SK_PermuteSingleSrc, Ty, None, 0, Ty);
-    ArithCost += NumReduxLevels * thisT()->getArithmeticInstrCost(Opcode, Ty);
+    ArithCost +=
+        NumReduxLevels * thisT()->getArithmeticInstrCost(Opcode, Ty, CostKind);
     return ShuffleCost + ArithCost +
            thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
   }
diff --git a/llvm/include/llvm/CodeGen/CodeGenCommonISel.h b/llvm/include/llvm/CodeGen/CodeGenCommonISel.h
new file mode 100644
index 000000000000..270f935b6738
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/CodeGenCommonISel.h
@@ -0,0 +1,219 @@
+//===- CodeGenCommonISel.h - Common code between ISels ---------*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares common utilities that are shared between SelectionDAG and
+// GlobalISel frameworks.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_CODEGENCOMMONISEL_H
+#define LLVM_CODEGEN_CODEGENCOMMONISEL_H
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include <cassert>
+namespace llvm {
+
+class BasicBlock;
+class MachineBasicBlock;
+/// Encapsulates all of the information needed to generate a stack protector
+/// check, and signals to isel when initialized that one needs to be generated.
+///
+/// *NOTE* The following is a high level documentation of SelectionDAG Stack
+/// Protector Generation. This is now also ported be shared with GlobalISel,
+/// but without any significant changes.
+///
+/// High Level Overview of ISel Stack Protector Generation:
+///
+/// Previously, the "stack protector" IR pass handled stack protector
+/// generation. This necessitated splitting basic blocks at the IR level to
+/// create the success/failure basic blocks in the tail of the basic block in
+/// question. As a result of this, calls that would have qualified for the
+/// sibling call optimization were no longer eligible for optimization since
+/// said calls were no longer right in the "tail position" (i.e. the immediate
+/// predecessor of a ReturnInst instruction).
+///
+/// Since the sibling call optimization causes the callee to reuse the caller's
+/// stack, if we could delay the generation of the stack protector check until
+/// later in CodeGen after the sibling call decision was made, we get both the
+/// tail call optimization and the stack protector check!
+///
+/// A few goals in solving this problem were:
+///
+///   1. Preserve the architecture independence of stack protector generation.
+///
+///   2. Preserve the normal IR level stack protector check for platforms like
+///      OpenBSD for which we support platform-specific stack protector
+///      generation.
+///
+/// The main problem that guided the present solution is that one can not
+/// solve this problem in an architecture independent manner at the IR level
+/// only. This is because:
+///
+///   1. The decision on whether or not to perform a sibling call on certain
+///      platforms (for instance i386) requires lower level information
+///      related to available registers that can not be known at the IR level.
+///
+///   2. Even if the previous point were not true, the decision on whether to
+///      perform a tail call is done in LowerCallTo in SelectionDAG (or
+///      CallLowering in GlobalISel) which occurs after the Stack Protector
+///      Pass. As a result, one would need to put the relevant callinst into the
+///      stack protector check success basic block (where the return inst is
+///      placed) and then move it back later at ISel/MI time before the
+///      stack protector check if the tail call optimization failed. The MI
+///      level option was nixed immediately since it would require
+///      platform-specific pattern matching. The ISel level option was
+///      nixed because SelectionDAG only processes one IR level basic block at a
+///      time implying one could not create a DAG Combine to move the callinst.
+///
+/// To get around this problem:
+///
+///   1. SelectionDAG can only process one block at a time, we can generate
+///      multiple machine basic blocks for one IR level basic block.
+///      This is how we handle bit tests and switches.
+///
+///   2. At the MI level, tail calls are represented via a special return
+///      MIInst called "tcreturn". Thus if we know the basic block in which we
+///      wish to insert the stack protector check, we get the correct behavior
+///      by always inserting the stack protector check right before the return
+///      statement. This is a "magical transformation" since no matter where
+///      the stack protector check intrinsic is, we always insert the stack
+///      protector check code at the end of the BB.
+///
+/// Given the aforementioned constraints, the following solution was devised:
+///
+///   1. On platforms that do not support ISel stack protector check
+///      generation, allow for the normal IR level stack protector check
+///      generation to continue.
+///
+///   2. On platforms that do support ISel stack protector check
+///      generation:
+///
+///     a. Use the IR level stack protector pass to decide if a stack
+///        protector is required/which BB we insert the stack protector check
+///        in by reusing the logic already therein.
+///
+///     b. After we finish selecting the basic block, we produce the validation
+///        code with one of these techniques:
+///          1) with a call to a guard check function
+///          2) with inlined instrumentation
+///
+///        1) We insert a call to the check function before the terminator.
+///
+///        2) We first find a splice point in the parent basic block
+///        before the terminator and then splice the terminator of said basic
+///        block into the success basic block. Then we code-gen a new tail for
+///        the parent basic block consisting of the two loads, the comparison,
+///        and finally two branches to the success/failure basic blocks. We
+///        conclude by code-gening the failure basic block if we have not
+///        code-gened it already (all stack protector checks we generate in
+///        the same function, use the same failure basic block).
+class StackProtectorDescriptor {
+public:
+  StackProtectorDescriptor() = default;
+
+  /// Returns true if all fields of the stack protector descriptor are
+  /// initialized implying that we should/are ready to emit a stack protector.
+  bool shouldEmitStackProtector() const {
+    return ParentMBB && SuccessMBB && FailureMBB;
+  }
+
+  bool shouldEmitFunctionBasedCheckStackProtector() const {
+    return ParentMBB && !SuccessMBB && !FailureMBB;
+  }
+
+  /// Initialize the stack protector descriptor structure for a new basic
+  /// block.
+  void initialize(const BasicBlock *BB, MachineBasicBlock *MBB,
+                  bool FunctionBasedInstrumentation) {
+    // Make sure we are not initialized yet.
+    assert(!shouldEmitStackProtector() && "Stack Protector Descriptor is "
+                                          "already initialized!");
+    ParentMBB = MBB;
+    if (!FunctionBasedInstrumentation) {
+      SuccessMBB = addSuccessorMBB(BB, MBB, /* IsLikely */ true);
+      FailureMBB = addSuccessorMBB(BB, MBB, /* IsLikely */ false, FailureMBB);
+    }
+  }
+
+  /// Reset state that changes when we handle different basic blocks.
+  ///
+  /// This currently includes:
+  ///
+  /// 1. The specific basic block we are generating a
+  /// stack protector for (ParentMBB).
+  ///
+  /// 2. The successor machine basic block that will contain the tail of
+  /// parent mbb after we create the stack protector check (SuccessMBB). This
+  /// BB is visited only on stack protector check success.
+  void resetPerBBState() {
+    ParentMBB = nullptr;
+    SuccessMBB = nullptr;
+  }
+
+  /// Reset state that only changes when we switch functions.
+  ///
+  /// This currently includes:
+  ///
+  /// 1. FailureMBB since we reuse the failure code path for all stack
+  /// protector checks created in an individual function.
+  ///
+  /// 2.The guard variable since the guard variable we are checking against is
+  /// always the same.
+  void resetPerFunctionState() { FailureMBB = nullptr; }
+
+  MachineBasicBlock *getParentMBB() { return ParentMBB; }
+  MachineBasicBlock *getSuccessMBB() { return SuccessMBB; }
+  MachineBasicBlock *getFailureMBB() { return FailureMBB; }
+
+private:
+  /// The basic block for which we are generating the stack protector.
+  ///
+  /// As a result of stack protector generation, we will splice the
+  /// terminators of this basic block into the successor mbb SuccessMBB and
+  /// replace it with a compare/branch to the successor mbbs
+  /// SuccessMBB/FailureMBB depending on whether or not the stack protector
+  /// was violated.
+  MachineBasicBlock *ParentMBB = nullptr;
+
+  /// A basic block visited on stack protector check success that contains the
+  /// terminators of ParentMBB.
+  MachineBasicBlock *SuccessMBB = nullptr;
+
+  /// This basic block visited on stack protector check failure that will
+  /// contain a call to __stack_chk_fail().
+  MachineBasicBlock *FailureMBB = nullptr;
+
+  /// Add a successor machine basic block to ParentMBB. If the successor mbb
+  /// has not been created yet (i.e. if SuccMBB = 0), then the machine basic
+  /// block will be created. Assign a large weight if IsLikely is true.
+  MachineBasicBlock *addSuccessorMBB(const BasicBlock *BB,
+                                     MachineBasicBlock *ParentMBB,
+                                     bool IsLikely,
+                                     MachineBasicBlock *SuccMBB = nullptr);
+};
+
+/// Find the split point at which to splice the end of BB into its success stack
+/// protector check machine basic block.
+///
+/// On many platforms, due to ABI constraints, terminators, even before register
+/// allocation, use physical registers. This creates an issue for us since
+/// physical registers at this point can not travel across basic
+/// blocks. Luckily, selectiondag always moves physical registers into vregs
+/// when they enter functions and moves them through a sequence of copies back
+/// into the physical registers right before the terminator creating a
+/// ``Terminator Sequence''. This function is searching for the beginning of the
+/// terminator sequence so that we can ensure that we splice off not just the
+/// terminator, but additionally the copies that move the vregs into the
+/// physical registers.
+MachineBasicBlock::iterator
+findSplitPointForStackProtector(MachineBasicBlock *BB,
+                                const TargetInstrInfo &TII);
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_CODEGENCOMMONISEL_H
diff --git a/llvm/include/llvm/CodeGen/CommandFlags.h b/llvm/include/llvm/CodeGen/CommandFlags.h
index 5a4351756297..ed3cd54df272 100644
--- a/llvm/include/llvm/CodeGen/CommandFlags.h
+++ b/llvm/include/llvm/CodeGen/CommandFlags.h
@@ -48,7 +48,6 @@ Optional<CodeModel::Model> getExplicitCodeModel();
 
 llvm::ExceptionHandling getExceptionModel();
 
-CodeGenFileType getFileType();
 Optional<CodeGenFileType> getExplicitFileType();
 
 CodeGenFileType getFileType();
@@ -74,6 +73,8 @@ llvm::FloatABI::ABIType getFloatABIForCalls();
 
 llvm::FPOpFusion::FPOpFusionMode getFuseFPOps();
 
+SwiftAsyncFramePointerMode getSwiftAsyncFramePointer();
+
 bool getDontPlaceZerosInBSS();
 
 bool getEnableGuaranteedTailCallOpt();
@@ -128,8 +129,6 @@ bool getEnableMachineFunctionSplitter();
 
 bool getEnableDebugEntryValues();
 
-bool getPseudoProbeForProfiling();
-
 bool getValueTrackingVariableLocations();
 
 bool getForceDwarfFrameSection();
@@ -138,6 +137,8 @@ bool getXRayOmitFunctionIndex();
 
 bool getDebugStrictDwarf();
 
+unsigned getAlignLoops();
+
 /// Create this object with static storage to register codegen-related command
 /// line options.
 struct RegisterCodeGenFlags {
diff --git a/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h b/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
index b6bde0249f88..524730d53694 100644
--- a/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
+++ b/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
@@ -17,7 +17,6 @@
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/IndexedMap.h"
-#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/ISDOpcodes.h"
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
index 6bdaddd9c6f5..9c878d4b087b 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
@@ -116,6 +116,9 @@ public:
     /// vreg that the swifterror should be copied into after the call.
     Register SwiftErrorVReg;
 
+    /// Original IR callsite corresponding to this call, if available.
+    const CallBase *CB = nullptr;
+
     MDNode *KnownCallees = nullptr;
 
     /// True if the call must be tail call optimized.
@@ -259,7 +262,7 @@ public:
     /// handle the appropriate COPY (either to or from) and mark any
     /// relevant uses/defines as needed.
     virtual void assignValueToReg(Register ValVReg, Register PhysReg,
-                                  CCValAssign &VA) = 0;
+                                  CCValAssign VA) = 0;
 
     /// The specified value has been assigned to a stack
     /// location. Load or store it there, with appropriate extension
@@ -279,11 +282,14 @@ public:
     }
 
     /// Handle custom values, which may be passed into one or more of \p VAs.
+    /// \p If the handler wants the assignments to be delayed until after
+    /// mem loc assignments, then it sets \p Thunk to the thunk to do the
+    /// assignment.
     /// \return The number of \p VAs that have been assigned after the first
     ///         one, and which should therefore be skipped from further
     ///         processing.
-    virtual unsigned assignCustomValue(ArgInfo &Arg,
-                                       ArrayRef<CCValAssign> VAs) {
+    virtual unsigned assignCustomValue(ArgInfo &Arg, ArrayRef<CCValAssign> VAs,
+                                       std::function<void()> *Thunk = nullptr) {
       // This is not a pure virtual method because not all targets need to worry
       // about custom values.
       llvm_unreachable("Custom values not supported");
@@ -315,7 +321,7 @@ public:
 
     /// Provides a default implementation for argument handling.
     void assignValueToReg(Register ValVReg, Register PhysReg,
-                          CCValAssign &VA) override;
+                          CCValAssign VA) override;
   };
 
   /// Base class for ValueHandlers used for arguments passed to a function call,
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 56459b68dce0..ff4ad4b72636 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -36,7 +36,10 @@ class GISelKnownBits;
 class MachineDominatorTree;
 class LegalizerInfo;
 struct LegalityQuery;
+class RegisterBank;
+class RegisterBankInfo;
 class TargetLowering;
+class TargetRegisterInfo;
 
 struct PreferredTuple {
   LLT Ty;                // The result type of the extend.
@@ -54,6 +57,7 @@ struct IndexedLoadStoreMatchInfo {
 struct PtrAddChain {
   int64_t Imm;
   Register Base;
+  const RegisterBank *Bank;
 };
 
 struct RegisterImmPair {
@@ -68,6 +72,16 @@ struct ShiftOfShiftedLogic {
   uint64_t ValSum;
 };
 
+using BuildFnTy = std::function<void(MachineIRBuilder &)>;
+
+struct MergeTruncStoresInfo {
+  SmallVector<GStore *> FoundStores;
+  GStore *LowestIdxStore = nullptr;
+  Register WideSrcVal;
+  bool NeedBSwap = false;
+  bool NeedRotate = false;
+};
+
 using OperandBuildSteps =
     SmallVector<std::function<void(MachineInstrBuilder &)>, 4>;
 struct InstructionBuildSteps {
@@ -95,6 +109,8 @@ protected:
   GISelKnownBits *KB;
   MachineDominatorTree *MDT;
   const LegalizerInfo *LI;
+  const RegisterBankInfo *RBI;
+  const TargetRegisterInfo *TRI;
 
 public:
   CombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B,
@@ -120,6 +136,22 @@ public:
   void replaceRegOpWith(MachineRegisterInfo &MRI, MachineOperand &FromRegOp,
                         Register ToReg) const;
 
+  /// Replace the opcode in instruction with a new opcode and inform the
+  /// observer of the changes.
+  void replaceOpcodeWith(MachineInstr &FromMI, unsigned ToOpcode) const;
+
+  /// Get the register bank of \p Reg.
+  /// If Reg has not been assigned a register, a register class,
+  /// or a register bank, then this returns nullptr.
+  ///
+  /// \pre Reg.isValid()
+  const RegisterBank *getRegBank(Register Reg) const;
+
+  /// Set the register bank of \p Reg.
+  /// Does nothing if the RegBank is null.
+  /// This is the counterpart to getRegBank.
+  void setRegBank(Register Reg, const RegisterBank *RegBank);
+
   /// If \p MI is COPY, try to combine it.
   /// Returns true if MI changed.
   bool tryCombineCopy(MachineInstr &MI);
@@ -144,6 +176,9 @@ public:
   bool matchCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo);
   void applyCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo);
 
+  /// Match (and (load x), mask) -> zextload x
+  bool matchCombineLoadWithAndMask(MachineInstr &MI, BuildFnTy &MatchInfo);
+
   /// Combine \p MI into a pre-indexed or post-indexed load/store operation if
   /// legal and the surrounding code makes it useful.
   bool tryCombineIndexedLoadStore(MachineInstr &MI);
@@ -341,6 +376,9 @@ public:
   bool matchCombineFAbsOfFAbs(MachineInstr &MI, Register &Src);
   void applyCombineFAbsOfFAbs(MachineInstr &MI, Register &Src);
 
+  /// Transform fabs(fneg(x)) to fabs(x).
+  bool matchCombineFAbsOfFNeg(MachineInstr &MI, BuildFnTy &MatchInfo);
+
   /// Transform trunc ([asz]ext x) to x or ([asz]ext x) or (trunc x).
   bool matchCombineTruncOfExt(MachineInstr &MI,
                               std::pair<Register, unsigned> &MatchInfo);
@@ -445,7 +483,7 @@ public:
 
   /// Fold and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0
   bool matchOverlappingAnd(MachineInstr &MI,
-                           std::function<void(MachineIRBuilder &)> &MatchInfo);
+                           BuildFnTy &MatchInfo);
 
   /// \return true if \p MI is a G_AND instruction whose operands are x and y
   /// where x & y == x or x & y == y. (E.g., one of operands is all-ones value.)
@@ -501,8 +539,10 @@ public:
   ///
   /// And check if the tree can be replaced with a M-bit load + possibly a
   /// bswap.
-  bool matchLoadOrCombine(MachineInstr &MI,
-                          std::function<void(MachineIRBuilder &)> &MatchInfo);
+  bool matchLoadOrCombine(MachineInstr &MI, BuildFnTy &MatchInfo);
+
+  bool matchTruncStoreMerge(MachineInstr &MI, MergeTruncStoresInfo &MatchInfo);
+  void applyTruncStoreMerge(MachineInstr &MI, MergeTruncStoresInfo &MatchInfo);
 
   bool matchExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI);
   void applyExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI);
@@ -519,12 +559,10 @@ public:
 
   /// Use a function which takes in a MachineIRBuilder to perform a combine.
   /// By default, it erases the instruction \p MI from the function.
-  void applyBuildFn(MachineInstr &MI,
-                    std::function<void(MachineIRBuilder &)> &MatchInfo);
+  void applyBuildFn(MachineInstr &MI, BuildFnTy &MatchInfo);
   /// Use a function which takes in a MachineIRBuilder to perform a combine.
   /// This variant does not erase \p MI after calling the build function.
-  void applyBuildFnNoErase(MachineInstr &MI,
-                           std::function<void(MachineIRBuilder &)> &MatchInfo);
+  void applyBuildFnNoErase(MachineInstr &MI, BuildFnTy &MatchInfo);
 
   bool matchFunnelShiftToRotate(MachineInstr &MI);
   void applyFunnelShiftToRotate(MachineInstr &MI);
@@ -535,21 +573,57 @@ public:
   /// or false constant based off of KnownBits information.
   bool matchICmpToTrueFalseKnownBits(MachineInstr &MI, int64_t &MatchInfo);
 
-  bool matchBitfieldExtractFromSExtInReg(
-      MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo);
-  /// Match: and (lshr x, cst), mask -> ubfx x, cst, width
-  bool matchBitfieldExtractFromAnd(
-      MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo);
+  /// \returns true if a G_ICMP \p MI can be replaced with its LHS based off of
+  /// KnownBits information.
+  bool
+  matchICmpToLHSKnownBits(MachineInstr &MI,
+                          BuildFnTy &MatchInfo);
+
+  /// \returns true if (and (or x, c1), c2) can be replaced with (and x, c2)
+  bool matchAndOrDisjointMask(MachineInstr &MI, BuildFnTy &MatchInfo);
 
+  bool matchBitfieldExtractFromSExtInReg(MachineInstr &MI,
+                                         BuildFnTy &MatchInfo);
+  /// Match: and (lshr x, cst), mask -> ubfx x, cst, width
+  bool matchBitfieldExtractFromAnd(MachineInstr &MI, BuildFnTy &MatchInfo);
+
+  /// Match: shr (shl x, n), k -> sbfx/ubfx x, pos, width
+  bool matchBitfieldExtractFromShr(MachineInstr &MI, BuildFnTy &MatchInfo);
+
+  /// Match: shr (and x, n), k -> ubfx x, pos, width
+  bool matchBitfieldExtractFromShrAnd(MachineInstr &MI, BuildFnTy &MatchInfo);
+
+  // Helpers for reassociation:
+  bool matchReassocConstantInnerRHS(GPtrAdd &MI, MachineInstr *RHS,
+                                    BuildFnTy &MatchInfo);
+  bool matchReassocFoldConstantsInSubTree(GPtrAdd &MI, MachineInstr *LHS,
+                                          MachineInstr *RHS,
+                                          BuildFnTy &MatchInfo);
+  bool matchReassocConstantInnerLHS(GPtrAdd &MI, MachineInstr *LHS,
+                                    MachineInstr *RHS, BuildFnTy &MatchInfo);
   /// Reassociate pointer calculations with G_ADD involved, to allow better
   /// addressing mode usage.
-  bool matchReassocPtrAdd(MachineInstr &MI,
-                          std::function<void(MachineIRBuilder &)> &MatchInfo);
-
+  bool matchReassocPtrAdd(MachineInstr &MI, BuildFnTy &MatchInfo);
 
   /// Do constant folding when opportunities are exposed after MIR building.
   bool matchConstantFold(MachineInstr &MI, APInt &MatchInfo);
 
+  /// \returns true if it is possible to narrow the width of a scalar binop
+  /// feeding a G_AND instruction \p MI.
+  bool matchNarrowBinopFeedingAnd(MachineInstr &MI, BuildFnTy &MatchInfo);
+
+  /// Given an G_UDIV \p MI expressing a divide by constant, return an
+  /// expression that implements it by multiplying by a magic number.
+  /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
+  MachineInstr *buildUDivUsingMul(MachineInstr &MI);
+  /// Combine G_UDIV by constant into a multiply by magic constant.
+  bool matchUDivByConst(MachineInstr &MI);
+  void applyUDivByConst(MachineInstr &MI);
+
+  // G_UMULH x, (1 << c)) -> x >> (bitwidth - c)
+  bool matchUMulHToLShr(MachineInstr &MI);
+  void applyUMulHToLShr(MachineInstr &MI);
+
   /// Try to transform \p MI by using all of the above
   /// combine functions. Returns true if changed.
   bool tryCombine(MachineInstr &MI);
@@ -560,20 +634,21 @@ public:
   ///       and rename: s/bool tryEmit/void emit/
   bool tryEmitMemcpyInline(MachineInstr &MI);
 
-private:
-  // Memcpy family optimization helpers.
-  bool tryEmitMemcpyInline(MachineInstr &MI, Register Dst, Register Src,
-                           uint64_t KnownLen, Align DstAlign, Align SrcAlign,
-                           bool IsVolatile);
-  bool optimizeMemcpy(MachineInstr &MI, Register Dst, Register Src,
-                      uint64_t KnownLen, uint64_t Limit, Align DstAlign,
-                      Align SrcAlign, bool IsVolatile);
-  bool optimizeMemmove(MachineInstr &MI, Register Dst, Register Src,
-                       uint64_t KnownLen, Align DstAlign, Align SrcAlign,
-                       bool IsVolatile);
-  bool optimizeMemset(MachineInstr &MI, Register Dst, Register Val,
-                      uint64_t KnownLen, Align DstAlign, bool IsVolatile);
+  /// Match:
+  ///   (G_UMULO x, 2) -> (G_UADDO x, x)
+  ///   (G_SMULO x, 2) -> (G_SADDO x, x)
+  bool matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo);
 
+  /// Transform (fadd x, fneg(y)) -> (fsub x, y)
+  ///           (fadd fneg(x), y) -> (fsub y, x)
+  ///           (fsub x, fneg(y)) -> (fadd x, y)
+  ///           (fmul fneg(x), fneg(y)) -> (fmul x, y)
+  ///           (fdiv fneg(x), fneg(y)) -> (fdiv x, y)
+  ///           (fmad fneg(x), fneg(y), z) -> (fmad x, y, z)
+  ///           (fma fneg(x), fneg(y), z) -> (fma x, y, z)
+  bool matchRedundantNegOperands(MachineInstr &MI, BuildFnTy &MatchInfo);
+
+private:
   /// Given a non-indexed load or store instruction \p MI, find an offset that
   /// can be usefully and legally folded into it as a post-indexing operation.
   ///
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
index 1162134b2ad2..7103656365b1 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
@@ -57,9 +57,9 @@ public:
   bool isUnordered() const { return getMMO().isUnordered(); }
 
   /// Returns the size in bytes of the memory access.
-  uint64_t getMemSize() { return getMMO().getSize();
+  uint64_t getMemSize() const { return getMMO().getSize();
   } /// Returns the size in bits of the memory access.
-  uint64_t getMemSizeInBits() { return getMMO().getSizeInBits(); }
+  uint64_t getMemSizeInBits() const { return getMMO().getSizeInBits(); }
 
   static bool classof(const MachineInstr *MI) {
     switch (MI->getOpcode()) {
@@ -195,6 +195,37 @@ public:
   }
 };
 
+/// Represents a G_PTR_ADD.
+class GPtrAdd : public GenericMachineInstr {
+public:
+  Register getBaseReg() const { return getReg(1); }
+  Register getOffsetReg() const { return getReg(2); }
+
+  static bool classof(const MachineInstr *MI) {
+    return MI->getOpcode() == TargetOpcode::G_PTR_ADD;
+  }
+};
+
+/// Represents a G_IMPLICIT_DEF.
+class GImplicitDef : public GenericMachineInstr {
+public:
+  static bool classof(const MachineInstr *MI) {
+    return MI->getOpcode() == TargetOpcode::G_IMPLICIT_DEF;
+  }
+};
+
+/// Represents a G_SELECT.
+class GSelect : public GenericMachineInstr {
+public:
+  Register getCondReg() const { return getReg(1); }
+  Register getTrueReg() const { return getReg(2); }
+  Register getFalseReg() const { return getReg(3); }
+
+  static bool classof(const MachineInstr *MI) {
+    return MI->getOpcode() == TargetOpcode::G_SELECT;
+  }
+};
+
 } // namespace llvm
 
-#endif // LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H
\ No newline at end of file
+#endif // LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
index 8eab8a5846a7..ebe16cd4f58c 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
@@ -20,6 +20,7 @@
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/CodeGenCommonISel.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
 #include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -466,9 +467,8 @@ private:
   bool translateSIToFP(const User &U, MachineIRBuilder &MIRBuilder) {
     return translateCast(TargetOpcode::G_SITOFP, U, MIRBuilder);
   }
-  bool translateUnreachable(const User &U, MachineIRBuilder &MIRBuilder) {
-    return true;
-  }
+  bool translateUnreachable(const User &U, MachineIRBuilder &MIRBuilder);
+
   bool translateSExt(const User &U, MachineIRBuilder &MIRBuilder) {
     return translateCast(TargetOpcode::G_SEXT, U, MIRBuilder);
   }
@@ -586,6 +586,8 @@ private:
   /// stop translating such blocks early.
   bool HasTailCall = false;
 
+  StackProtectorDescriptor SPDescriptor;
+
   /// Switch analysis and optimization.
   class GISelSwitchLowering : public SwitchCG::SwitchLowering {
   public:
@@ -614,8 +616,34 @@ private:
   // * Clear the different maps.
   void finalizeFunction();
 
-  // Handle emitting jump tables for each basic block.
-  void finalizeBasicBlock();
+  // Processing steps done per block. E.g. emitting jump tables, stack
+  // protectors etc. Returns true if no errors, false if there was a problem
+  // that caused an abort.
+  bool finalizeBasicBlock(const BasicBlock &BB, MachineBasicBlock &MBB);
+
+  /// Codegen a new tail for a stack protector check ParentMBB which has had its
+  /// tail spliced into a stack protector check success bb.
+  ///
+  /// For a high level explanation of how this fits into the stack protector
+  /// generation see the comment on the declaration of class
+  /// StackProtectorDescriptor.
+  ///
+  /// \return true if there were no problems.
+  bool emitSPDescriptorParent(StackProtectorDescriptor &SPD,
+                              MachineBasicBlock *ParentBB);
+
+  /// Codegen the failure basic block for a stack protector check.
+  ///
+  /// A failure stack protector machine basic block consists simply of a call to
+  /// __stack_chk_fail().
+  ///
+  /// For a high level explanation of how this fits into the stack protector
+  /// generation see the comment on the declaration of class
+  /// StackProtectorDescriptor.
+  ///
+  /// \return true if there were no problems.
+  bool emitSPDescriptorFailure(StackProtectorDescriptor &SPD,
+                               MachineBasicBlock *FailureBB);
 
   /// Get the VRegs that represent \p Val.
   /// Non-aggregate types have just one corresponding VReg and the list can be
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegacyLegalizerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/LegacyLegalizerInfo.h
index b1f2103da309..f6704df3f49d 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegacyLegalizerInfo.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegacyLegalizerInfo.h
@@ -478,4 +478,4 @@ private:
 
 } // end namespace llvm
 
-#endif // define LLVM_CODEGEN_GLOBALISEL_LEGACYLEGALIZERINFO_H
+#endif // LLVM_CODEGEN_GLOBALISEL_LEGACYLEGALIZERINFO_H
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
index 44a48927d35a..8a603de2f91d 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
@@ -15,6 +15,7 @@
 #define LLVM_CODEGEN_GLOBALISEL_LEGALIZATIONARTIFACTCOMBINER_H
 
 #include "llvm/ADT/SmallBitVector.h"
+#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
 #include "llvm/CodeGen/GlobalISel/Legalizer.h"
 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
@@ -22,6 +23,7 @@
 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
 #include "llvm/CodeGen/GlobalISel/Utils.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Register.h"
 #include "llvm/Support/Debug.h"
 
 #define DEBUG_TYPE "legalizer"
@@ -52,7 +54,8 @@ public:
 
   bool tryCombineAnyExt(MachineInstr &MI,
                         SmallVectorImpl<MachineInstr *> &DeadInsts,
-                        SmallVectorImpl<Register> &UpdatedDefs) {
+                        SmallVectorImpl<Register> &UpdatedDefs,
+                        GISelObserverWrapper &Observer) {
     assert(MI.getOpcode() == TargetOpcode::G_ANYEXT);
 
     Builder.setInstrAndDebugLoc(MI);
@@ -63,7 +66,11 @@ public:
     Register TruncSrc;
     if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) {
       LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;);
-      Builder.buildAnyExtOrTrunc(DstReg, TruncSrc);
+      if (MRI.getType(DstReg) == MRI.getType(TruncSrc))
+        replaceRegOrBuildCopy(DstReg, TruncSrc, MRI, Builder, UpdatedDefs,
+                              Observer);
+      else
+        Builder.buildAnyExtOrTrunc(DstReg, TruncSrc);
       UpdatedDefs.push_back(DstReg);
       markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
       return true;
@@ -120,12 +127,14 @@ public:
         return false;
       LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;);
       LLT SrcTy = MRI.getType(SrcReg);
-      APInt MaskVal = APInt::getAllOnesValue(SrcTy.getScalarSizeInBits());
+      APInt MaskVal = APInt::getAllOnes(SrcTy.getScalarSizeInBits());
       auto Mask = Builder.buildConstant(
         DstTy, MaskVal.zext(DstTy.getScalarSizeInBits()));
-      auto Extended = SextSrc ? Builder.buildSExtOrTrunc(DstTy, SextSrc) :
-                                Builder.buildAnyExtOrTrunc(DstTy, TruncSrc);
-      Builder.buildAnd(DstReg, Extended, Mask);
+      if (SextSrc && (DstTy != MRI.getType(SextSrc)))
+        SextSrc = Builder.buildSExtOrTrunc(DstTy, SextSrc).getReg(0);
+      if (TruncSrc && (DstTy != MRI.getType(TruncSrc)))
+        TruncSrc = Builder.buildAnyExtOrTrunc(DstTy, TruncSrc).getReg(0);
+      Builder.buildAnd(DstReg, SextSrc ? SextSrc : TruncSrc, Mask);
       markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
       return true;
     }
@@ -176,9 +185,9 @@ public:
       LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;);
       LLT SrcTy = MRI.getType(SrcReg);
       uint64_t SizeInBits = SrcTy.getScalarSizeInBits();
-      Builder.buildInstr(
-          TargetOpcode::G_SEXT_INREG, {DstReg},
-          {Builder.buildAnyExtOrTrunc(DstTy, TruncSrc), SizeInBits});
+      if (DstTy != MRI.getType(TruncSrc))
+        TruncSrc = Builder.buildAnyExtOrTrunc(DstTy, TruncSrc).getReg(0);
+      Builder.buildSExtInReg(DstReg, TruncSrc, SizeInBits);
       markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
       return true;
     }
@@ -544,12 +553,14 @@ public:
     MachineIRBuilder &MIB;
     const LegalizerInfo &LI;
 
-  private:
+    // Stores the best register found in the current query so far.
+    Register CurrentBest = Register();
+
     /// Given an concat_vector op \p Concat and a start bit and size, try to
     /// find the origin of the value defined by that start position and size.
     ///
-    /// \returns A register if a value can be found, otherwise an empty
-    /// Register.
+    /// \returns a register with the requested size, or the current best
+    /// register found during the current query.
     Register findValueFromConcat(GConcatVectors &Concat, unsigned StartBit,
                                  unsigned Size) {
       assert(Size > 0);
@@ -566,22 +577,22 @@ public:
       // FIXME: we might be able return multiple sources? Or create an
       // appropriate concat to make it fit.
       if (InRegOffset + Size > SrcSize)
-        return Register();
+        return CurrentBest;
 
-      // If the bits exactly cover a single source, then return the operand as
-      // our value reg.
       Register SrcReg = Concat.getReg(StartSrcIdx);
-      if (InRegOffset == 0 && Size == SrcSize)
-        return SrcReg; // A source operand matches exactly.
+      if (InRegOffset == 0 && Size == SrcSize) {
+        CurrentBest = SrcReg;
+        return findValueFromDefImpl(SrcReg, 0, Size);
+      }
 
-      return findValueFromDef(SrcReg, InRegOffset, Size);
+      return findValueFromDefImpl(SrcReg, InRegOffset, Size);
     }
 
     /// Given an build_vector op \p BV and a start bit and size, try to find
     /// the origin of the value defined by that start position and size.
     ///
-    /// \returns A register if a value can be found, otherwise an empty
-    /// Register.
+    /// \returns a register with the requested size, or the current best
+    /// register found during the current query.
     Register findValueFromBuildVector(GBuildVector &BV, unsigned StartBit,
                                       unsigned Size) {
       assert(Size > 0);
@@ -596,17 +607,21 @@ public:
       unsigned InRegOffset = StartBit % SrcSize;
 
       if (InRegOffset != 0)
-        return Register(); // Give up, bits don't start at a scalar source.
+        return CurrentBest; // Give up, bits don't start at a scalar source.
       if (Size < SrcSize)
-        return Register(); // Scalar source is too large for requested bits.
+        return CurrentBest; // Scalar source is too large for requested bits.
 
       // If the bits cover multiple sources evenly, then create a new
       // build_vector to synthesize the required size, if that's been requested.
       if (Size > SrcSize) {
         if (Size % SrcSize > 0)
-          return Register(); // Isn't covered exactly by sources.
+          return CurrentBest; // Isn't covered exactly by sources.
 
         unsigned NumSrcsUsed = Size / SrcSize;
+        // If we're requesting all of the sources, just return this def.
+        if (NumSrcsUsed == BV.getNumSources())
+          return BV.getReg(0);
+
         LLT SrcTy = MRI.getType(Src1Reg);
         LLT NewBVTy = LLT::fixed_vector(NumSrcsUsed, SrcTy);
 
@@ -614,7 +629,7 @@ public:
         LegalizeActionStep ActionStep =
             LI.getAction({TargetOpcode::G_BUILD_VECTOR, {NewBVTy, SrcTy}});
         if (ActionStep.Action != LegalizeActions::Legal)
-          return Register();
+          return CurrentBest;
 
         SmallVector<Register> NewSrcs;
         for (unsigned SrcIdx = StartSrcIdx; SrcIdx < StartSrcIdx + NumSrcsUsed;
@@ -630,8 +645,8 @@ public:
     /// Given an G_INSERT op \p MI and a start bit and size, try to find
     /// the origin of the value defined by that start position and size.
     ///
-    /// \returns A register if a value can be found, otherwise an empty
-    /// Register.
+    /// \returns a register with the requested size, or the current best
+    /// register found during the current query.
     Register findValueFromInsert(MachineInstr &MI, unsigned StartBit,
                                  unsigned Size) {
       assert(MI.getOpcode() == TargetOpcode::G_INSERT);
@@ -685,28 +700,25 @@ public:
       if (EndBit <= InsertOffset || InsertedEndBit <= StartBit) {
         SrcRegToUse = ContainerSrcReg;
         NewStartBit = StartBit;
-        return findValueFromDef(SrcRegToUse, NewStartBit, Size);
+        return findValueFromDefImpl(SrcRegToUse, NewStartBit, Size);
       }
       if (InsertOffset <= StartBit && EndBit <= InsertedEndBit) {
         SrcRegToUse = InsertedReg;
         NewStartBit = StartBit - InsertOffset;
-        return findValueFromDef(SrcRegToUse, NewStartBit, Size);
+        if (NewStartBit == 0 &&
+            Size == MRI.getType(SrcRegToUse).getSizeInBits())
+          CurrentBest = SrcRegToUse;
+        return findValueFromDefImpl(SrcRegToUse, NewStartBit, Size);
       }
       // The bit range spans both the inserted and container regions.
       return Register();
     }
 
-  public:
-    ArtifactValueFinder(MachineRegisterInfo &Mri, MachineIRBuilder &Builder,
-                        const LegalizerInfo &Info)
-        : MRI(Mri), MIB(Builder), LI(Info) {}
-
-    /// Try to find a source of the value defined in the def \p DefReg, starting
-    /// at position \p StartBit with size \p Size.
-    /// \returns an empty Register if no value could be found, or \p DefReg if
-    /// if that was the best we could do.
-    Register findValueFromDef(Register DefReg, unsigned StartBit,
-                              unsigned Size) {
+    /// Internal implementation for findValueFromDef(). findValueFromDef()
+    /// initializes some data like the CurrentBest register, which this method
+    /// and its callees rely upon.
+    Register findValueFromDefImpl(Register DefReg, unsigned StartBit,
+                                  unsigned Size) {
       MachineInstr *Def = getDefIgnoringCopies(DefReg, MRI);
       // If the instruction has a single def, then simply delegate the search.
       // For unmerge however with multiple defs, we need to compute the offset
@@ -724,7 +736,7 @@ public:
         }
         Register SrcReg = Def->getOperand(Def->getNumOperands() - 1).getReg();
         Register SrcOriginReg =
-            findValueFromDef(SrcReg, StartBit + DefStartBit, Size);
+            findValueFromDefImpl(SrcReg, StartBit + DefStartBit, Size);
         if (SrcOriginReg)
           return SrcOriginReg;
         // Failed to find a further value. If the StartBit and Size perfectly
@@ -732,7 +744,7 @@ public:
         // nothing.
         if (StartBit == 0 && Size == DefSize)
           return DefReg;
-        return Register();
+        return CurrentBest;
       }
       case TargetOpcode::G_BUILD_VECTOR:
         return findValueFromBuildVector(cast<GBuildVector>(*Def), StartBit,
@@ -740,41 +752,48 @@ public:
       case TargetOpcode::G_INSERT:
         return findValueFromInsert(*Def, StartBit, Size);
       default:
-        return Register();
+        return CurrentBest;
       }
     }
-  };
 
-  bool tryCombineUnmergeValues(GUnmerge &MI,
-                               SmallVectorImpl<MachineInstr *> &DeadInsts,
-                               SmallVectorImpl<Register> &UpdatedDefs,
-                               GISelChangeObserver &Observer) {
-    unsigned NumDefs = MI.getNumDefs();
-    Register SrcReg = MI.getSourceReg();
-    MachineInstr *SrcDef = getDefIgnoringCopies(SrcReg, MRI);
-    if (!SrcDef)
-      return false;
-
-    LLT OpTy = MRI.getType(SrcReg);
-    LLT DestTy = MRI.getType(MI.getReg(0));
-    unsigned SrcDefIdx = getDefIndex(*SrcDef, SrcReg);
+  public:
+    ArtifactValueFinder(MachineRegisterInfo &Mri, MachineIRBuilder &Builder,
+                        const LegalizerInfo &Info)
+        : MRI(Mri), MIB(Builder), LI(Info) {}
 
-    Builder.setInstrAndDebugLoc(MI);
+    /// Try to find a source of the value defined in the def \p DefReg, starting
+    /// at position \p StartBit with size \p Size.
+    /// \returns a register with the requested size, or an empty Register if no
+    /// better value could be found.
+    Register findValueFromDef(Register DefReg, unsigned StartBit,
+                              unsigned Size) {
+      CurrentBest = Register();
+      Register FoundReg = findValueFromDefImpl(DefReg, StartBit, Size);
+      return FoundReg != DefReg ? FoundReg : Register();
+    }
 
-    auto tryCombineViaValueFinder = [&]() {
-      ArtifactValueFinder ValueFinder(MRI, Builder, LI);
+    /// Try to combine the defs of an unmerge \p MI by attempting to find
+    /// values that provides the bits for each def reg.
+    /// \returns true if all the defs of the unmerge have been made dead.
+    bool tryCombineUnmergeDefs(GUnmerge &MI, GISelChangeObserver &Observer,
+                               SmallVectorImpl<Register> &UpdatedDefs) {
+      unsigned NumDefs = MI.getNumDefs();
+      LLT DestTy = MRI.getType(MI.getReg(0));
 
       SmallBitVector DeadDefs(NumDefs);
       for (unsigned DefIdx = 0; DefIdx < NumDefs; ++DefIdx) {
         Register DefReg = MI.getReg(DefIdx);
-        Register FoundVal =
-            ValueFinder.findValueFromDef(DefReg, 0, DestTy.getSizeInBits());
-        if (!FoundVal || FoundVal == DefReg)
+        if (MRI.use_nodbg_empty(DefReg)) {
+          DeadDefs[DefIdx] = true;
+          continue;
+        }
+        Register FoundVal = findValueFromDef(DefReg, 0, DestTy.getSizeInBits());
+        if (!FoundVal)
           continue;
         if (MRI.getType(FoundVal) != DestTy)
           continue;
 
-        replaceRegOrBuildCopy(DefReg, FoundVal, MRI, Builder, UpdatedDefs,
+        replaceRegOrBuildCopy(DefReg, FoundVal, MRI, MIB, UpdatedDefs,
                               Observer);
         // We only want to replace the uses, not the def of the old reg.
         Observer.changingInstr(MI);
@@ -782,12 +801,31 @@ public:
         Observer.changedInstr(MI);
         DeadDefs[DefIdx] = true;
       }
-      if (DeadDefs.all()) {
-        markInstAndDefDead(MI, *SrcDef, DeadInsts, SrcDefIdx);
-        return true;
-      }
+      return DeadDefs.all();
+    }
+  };
+
+  bool tryCombineUnmergeValues(GUnmerge &MI,
+                               SmallVectorImpl<MachineInstr *> &DeadInsts,
+                               SmallVectorImpl<Register> &UpdatedDefs,
+                               GISelChangeObserver &Observer) {
+    unsigned NumDefs = MI.getNumDefs();
+    Register SrcReg = MI.getSourceReg();
+    MachineInstr *SrcDef = getDefIgnoringCopies(SrcReg, MRI);
+    if (!SrcDef)
       return false;
-    };
+
+    LLT OpTy = MRI.getType(SrcReg);
+    LLT DestTy = MRI.getType(MI.getReg(0));
+    unsigned SrcDefIdx = getDefIndex(*SrcDef, SrcReg);
+
+    Builder.setInstrAndDebugLoc(MI);
+
+    ArtifactValueFinder Finder(MRI, Builder, LI);
+    if (Finder.tryCombineUnmergeDefs(MI, Observer, UpdatedDefs)) {
+      markInstAndDefDead(MI, *SrcDef, DeadInsts, SrcDefIdx);
+      return true;
+    }
 
     if (auto *SrcUnmerge = dyn_cast<GUnmerge>(SrcDef)) {
       // %0:_(<4 x s16>) = G_FOO
@@ -813,7 +851,7 @@ public:
           return false;
         break;
       default:
-        return tryCombineViaValueFinder();
+        return false;
       }
 
       auto NewUnmerge = Builder.buildUnmerge(DestTy, SrcUnmergeSrc);
@@ -845,11 +883,7 @@ public:
                                        ConvertOp, OpTy, DestTy)) {
       // We might have a chance to combine later by trying to combine
       // unmerge(cast) first
-      if (tryFoldUnmergeCast(MI, *SrcDef, DeadInsts, UpdatedDefs))
-        return true;
-
-      // Try using the value finder.
-      return tryCombineViaValueFinder();
+      return tryFoldUnmergeCast(MI, *SrcDef, DeadInsts, UpdatedDefs);
     }
 
     const unsigned NumMergeRegs = MergeI->getNumOperands() - 1;
@@ -1042,7 +1076,7 @@ public:
     default:
       return false;
     case TargetOpcode::G_ANYEXT:
-      Changed = tryCombineAnyExt(MI, DeadInsts, UpdatedDefs);
+      Changed = tryCombineAnyExt(MI, DeadInsts, UpdatedDefs, WrapperObserver);
       break;
     case TargetOpcode::G_ZEXT:
       Changed = tryCombineZExt(MI, DeadInsts, UpdatedDefs, WrapperObserver);
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
index 67141f3a6326..74615c73741a 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
@@ -256,6 +256,20 @@ private:
                                         LLT SrcTy, LLT NarrowTy,
                                         unsigned ScalarOpc);
 
+  // Memcpy family legalization helpers.
+  LegalizeResult lowerMemset(MachineInstr &MI, Register Dst, Register Val,
+                             uint64_t KnownLen, Align Alignment,
+                             bool IsVolatile);
+  LegalizeResult lowerMemcpyInline(MachineInstr &MI, Register Dst, Register Src,
+                                   uint64_t KnownLen, Align DstAlign,
+                                   Align SrcAlign, bool IsVolatile);
+  LegalizeResult lowerMemcpy(MachineInstr &MI, Register Dst, Register Src,
+                             uint64_t KnownLen, uint64_t Limit, Align DstAlign,
+                             Align SrcAlign, bool IsVolatile);
+  LegalizeResult lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
+                              uint64_t KnownLen, Align DstAlign, Align SrcAlign,
+                              bool IsVolatile);
+
 public:
   /// Return the alignment to use for a stack temporary object with the given
   /// type.
@@ -402,6 +416,9 @@ public:
   LegalizeResult lowerDIVREM(MachineInstr &MI);
   LegalizeResult lowerAbsToAddXor(MachineInstr &MI);
   LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI);
+  LegalizeResult lowerVectorReduction(MachineInstr &MI);
+  LegalizeResult lowerMemcpyInline(MachineInstr &MI);
+  LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen = 0);
 };
 
 /// Helper function that creates a libcall to the given \p Name using the given
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
index 4fdfabbfb161..68c14240ebc7 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
@@ -15,8 +15,6 @@
 #define LLVM_CODEGEN_GLOBALISEL_LEGALIZERINFO_H
 
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/None.h"
-#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallBitVector.h"
 #include "llvm/ADT/SmallVector.h"
@@ -113,6 +111,14 @@ struct LegalityQuery {
     LLT MemoryTy;
     uint64_t AlignInBits;
     AtomicOrdering Ordering;
+
+    MemDesc() = default;
+    MemDesc(LLT MemoryTy, uint64_t AlignInBits, AtomicOrdering Ordering)
+        : MemoryTy(MemoryTy), AlignInBits(AlignInBits), Ordering(Ordering) {}
+    MemDesc(const MachineMemOperand &MMO)
+        : MemoryTy(MMO.getMemoryType()),
+          AlignInBits(MMO.getAlign().value() * 8),
+          Ordering(MMO.getSuccessOrdering()) {}
   };
 
   /// Operations which require memory can use this to place requirements on the
@@ -293,6 +299,10 @@ LegalityPredicate scalarOrEltNarrowerThan(unsigned TypeIdx, unsigned Size);
 /// type that's wider than the given size.
 LegalityPredicate scalarOrEltWiderThan(unsigned TypeIdx, unsigned Size);
 
+/// True iff the specified type index is a scalar whose size is not a multiple
+/// of Size.
+LegalityPredicate sizeNotMultipleOf(unsigned TypeIdx, unsigned Size);
+
 /// True iff the specified type index is a scalar whose size is not a power of
 /// 2.
 LegalityPredicate sizeNotPow2(unsigned TypeIdx);
@@ -348,6 +358,11 @@ LegalizeMutation changeElementSizeTo(unsigned TypeIdx, unsigned FromTypeIdx);
 /// next power of 2.
 LegalizeMutation widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min = 0);
 
+/// Widen the scalar type or vector element type for the given type index to
+/// next multiple of \p Size.
+LegalizeMutation widenScalarOrEltToNextMultipleOf(unsigned TypeIdx,
+                                                  unsigned Size);
+
 /// Add more elements to the type for the given type index to the next power of
 /// 2.
 LegalizeMutation moreElementsToNextPow2(unsigned TypeIdx, unsigned Min = 0);
@@ -828,6 +843,16 @@ public:
         LegalizeMutations::widenScalarOrEltToNextPow2(TypeIdx, MinSize));
   }
 
+  /// Widen the scalar to the next multiple of Size. No effect if the
+  /// type is not a scalar or is a multiple of Size.
+  LegalizeRuleSet &widenScalarToNextMultipleOf(unsigned TypeIdx,
+                                               unsigned Size) {
+    using namespace LegalityPredicates;
+    return actionIf(
+        LegalizeAction::WidenScalar, sizeNotMultipleOf(typeIdx(TypeIdx), Size),
+        LegalizeMutations::widenScalarOrEltToNextMultipleOf(TypeIdx, Size));
+  }
+
   /// Widen the scalar or vector element type to the next power of two that is
   /// at least MinSize.  No effect if the scalar size is a power of two.
   LegalizeRuleSet &widenScalarOrEltToNextPow2(unsigned TypeIdx,
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h b/llvm/include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h
new file mode 100644
index 000000000000..29575f386d7a
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h
@@ -0,0 +1,165 @@
+//== llvm/CodeGen/GlobalISel/LoadStoreOpt.h - LoadStoreOpt -------*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// This is an optimization pass for GlobalISel generic memory operations.
+/// Specifically, it focuses on merging stores and loads to consecutive
+/// addresses.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_GLOBALISEL_LOADSTOREOPT_H
+#define LLVM_CODEGEN_GLOBALISEL_LOADSTOREOPT_H
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
+
+namespace llvm {
+// Forward declarations.
+class MachineRegisterInfo;
+class TargetTransformInfo;
+namespace GISelAddressing {
+/// Helper struct to store a base, index and offset that forms an address
+struct BaseIndexOffset {
+  Register BaseReg;
+  Register IndexReg;
+  int64_t Offset = 0;
+  bool IsIndexSignExt = false;
+};
+
+/// Returns a BaseIndexOffset which describes the pointer in \p Ptr.
+BaseIndexOffset getPointerInfo(Register Ptr, MachineRegisterInfo &MRI);
+
+/// Compute whether or not a memory access at \p MI1 aliases with an access at
+/// \p MI2 \returns true if either alias/no-alias is known. Sets \p IsAlias
+/// accordingly.
+bool aliasIsKnownForLoadStore(const MachineInstr &MI1, const MachineInstr &MI2,
+                              bool &IsAlias, MachineRegisterInfo &MRI);
+
+/// Returns true if the instruction \p MI may alias \p Other.
+/// This function uses multiple strategies to detect aliasing, whereas
+/// aliasIsKnownForLoadStore just looks at the addresses of load/stores and is
+/// tries to reason about base/index/offsets.
+bool instMayAlias(const MachineInstr &MI, const MachineInstr &Other,
+                  MachineRegisterInfo &MRI, AliasAnalysis *AA);
+} // namespace GISelAddressing
+
+using namespace GISelAddressing;
+
+class LoadStoreOpt : public MachineFunctionPass {
+public:
+  static char ID;
+
+private:
+  /// An input function to decide if the pass should run or not
+  /// on the given MachineFunction.
+  std::function<bool(const MachineFunction &)> DoNotRunPass;
+
+  MachineRegisterInfo *MRI;
+  const TargetLowering *TLI;
+  MachineFunction *MF;
+  AliasAnalysis *AA;
+  const LegalizerInfo *LI;
+
+  MachineIRBuilder Builder;
+
+  /// Initialize the field members using \p MF.
+  void init(MachineFunction &MF);
+
+  class StoreMergeCandidate {
+  public:
+    // The base pointer used as the base for all stores in this candidate.
+    Register BasePtr;
+    // Our algorithm is very simple at the moment. We assume that in instruction
+    // order stores are writing to incremeneting consecutive addresses. So when
+    // we walk the block in reverse order, the next eligible store must write to
+    // an offset one store width lower than CurrentLowestOffset.
+    uint64_t CurrentLowestOffset;
+    SmallVector<GStore *> Stores;
+    // A vector of MachineInstr/unsigned pairs to denote potential aliases that
+    // need to be checked before the candidate is considered safe to merge. The
+    // unsigned value is an index into the Stores vector. The indexed store is
+    // the highest-indexed store that has already been checked to not have an
+    // alias with the instruction. We record this so we don't have to repeat
+    // alias checks that have been already done, only those with stores added
+    // after the potential alias is recorded.
+    SmallVector<std::pair<MachineInstr *, unsigned>> PotentialAliases;
+
+    void addPotentialAlias(MachineInstr &MI);
+
+    /// Reset this candidate back to an empty one.
+    void reset() {
+      Stores.clear();
+      PotentialAliases.clear();
+      CurrentLowestOffset = 0;
+      BasePtr = Register();
+    }
+  };
+
+  bool isLegalOrBeforeLegalizer(const LegalityQuery &Query,
+                                MachineFunction &MF) const;
+  /// If the given store is valid to be a member of the candidate, add it and
+  /// return true. Otherwise, returns false.
+  bool addStoreToCandidate(GStore &MI, StoreMergeCandidate &C);
+  /// Returns true if the instruction \p MI would potentially alias with any
+  /// stores in the candidate \p C.
+  bool operationAliasesWithCandidate(MachineInstr &MI, StoreMergeCandidate &C);
+  /// Merges the stores in the given vector into a wide store.
+  /// \p returns true if at least some of the stores were merged.
+  /// This may decide not to merge stores if heuristics predict it will not be
+  /// worth it.
+  bool mergeStores(SmallVectorImpl<GStore *> &StoresToMerge);
+  /// Perform a merge of all the stores in \p Stores into a single store.
+  /// Erases the old stores from the block when finished.
+  /// \returns true if merging was done. It may fail to perform a merge if
+  /// there are issues with materializing legal wide values.
+  bool doSingleStoreMerge(SmallVectorImpl<GStore *> &Stores);
+  bool processMergeCandidate(StoreMergeCandidate &C);
+  bool mergeBlockStores(MachineBasicBlock &MBB);
+  bool mergeFunctionStores(MachineFunction &MF);
+
+  /// Initialize some target-specific data structures for the store merging
+  /// optimization. \p AddrSpace indicates which address space to use when
+  /// probing the legalizer info for legal stores.
+  void initializeStoreMergeTargetInfo(unsigned AddrSpace = 0);
+  /// A map between address space numbers and a bitvector of supported stores
+  /// sizes. Each bit in the bitvector represents whether a store size of
+  /// that bit's value is legal. E.g. if bit 64 is set, then 64 bit scalar
+  /// stores are legal.
+  DenseMap<unsigned, BitVector> LegalStoreSizes;
+  bool IsPreLegalizer;
+  /// Contains instructions to be erased at the end of a block scan.
+  SmallSet<MachineInstr *, 16> InstsToErase;
+
+public:
+  LoadStoreOpt();
+  LoadStoreOpt(std::function<bool(const MachineFunction &)>);
+
+  StringRef getPassName() const override { return "LoadStoreOpt"; }
+
+  MachineFunctionProperties getRequiredProperties() const override {
+    return MachineFunctionProperties()
+        .set(MachineFunctionProperties::Property::IsSSA);
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+};
+
+} // End namespace llvm.
+
+#endif
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h b/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
index 4c6b47ab9bc8..e813d030eec3 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
@@ -63,7 +63,7 @@ struct ConstantMatch {
   int64_t &CR;
   ConstantMatch(int64_t &C) : CR(C) {}
   bool match(const MachineRegisterInfo &MRI, Register Reg) {
-    if (auto MaybeCst = getConstantVRegSExtVal(Reg, MRI)) {
+    if (auto MaybeCst = getIConstantVRegSExtVal(Reg, MRI)) {
       CR = *MaybeCst;
       return true;
     }
@@ -73,21 +73,46 @@ struct ConstantMatch {
 
 inline ConstantMatch m_ICst(int64_t &Cst) { return ConstantMatch(Cst); }
 
-struct ICstRegMatch {
-  Register &CR;
-  ICstRegMatch(Register &C) : CR(C) {}
+struct GCstAndRegMatch {
+  Optional<ValueAndVReg> &ValReg;
+  GCstAndRegMatch(Optional<ValueAndVReg> &ValReg) : ValReg(ValReg) {}
   bool match(const MachineRegisterInfo &MRI, Register Reg) {
-    if (auto MaybeCst = getConstantVRegValWithLookThrough(
-            Reg, MRI, /*LookThroughInstrs*/ true,
-            /*HandleFConstants*/ false)) {
-      CR = MaybeCst->VReg;
-      return true;
-    }
-    return false;
+    ValReg = getIConstantVRegValWithLookThrough(Reg, MRI);
+    return ValReg ? true : false;
   }
 };
 
-inline ICstRegMatch m_ICst(Register &Reg) { return ICstRegMatch(Reg); }
+inline GCstAndRegMatch m_GCst(Optional<ValueAndVReg> &ValReg) {
+  return GCstAndRegMatch(ValReg);
+}
+
+struct GFCstAndRegMatch {
+  Optional<FPValueAndVReg> &FPValReg;
+  GFCstAndRegMatch(Optional<FPValueAndVReg> &FPValReg) : FPValReg(FPValReg) {}
+  bool match(const MachineRegisterInfo &MRI, Register Reg) {
+    FPValReg = getFConstantVRegValWithLookThrough(Reg, MRI);
+    return FPValReg ? true : false;
+  }
+};
+
+inline GFCstAndRegMatch m_GFCst(Optional<FPValueAndVReg> &FPValReg) {
+  return GFCstAndRegMatch(FPValReg);
+}
+
+struct GFCstOrSplatGFCstMatch {
+  Optional<FPValueAndVReg> &FPValReg;
+  GFCstOrSplatGFCstMatch(Optional<FPValueAndVReg> &FPValReg)
+      : FPValReg(FPValReg) {}
+  bool match(const MachineRegisterInfo &MRI, Register Reg) {
+    return (FPValReg = getFConstantSplat(Reg, MRI)) ||
+           (FPValReg = getFConstantVRegValWithLookThrough(Reg, MRI));
+  };
+};
+
+inline GFCstOrSplatGFCstMatch
+m_GFCstOrSplat(Optional<FPValueAndVReg> &FPValReg) {
+  return GFCstOrSplatGFCstMatch(FPValReg);
+}
 
 /// Matcher for a specific constant value.
 struct SpecificConstantMatch {
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
index 9b652d8e16bc..069f71b54328 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
@@ -1537,6 +1537,14 @@ public:
     return buildInstr(TargetOpcode::G_XOR, {Dst}, {Src0, NegOne});
   }
 
+  /// Build and insert integer negation
+  /// \p Zero = G_CONSTANT 0
+  /// \p Res = G_SUB Zero, \p Op0
+  MachineInstrBuilder buildNeg(const DstOp &Dst, const SrcOp &Src0) {
+    auto Zero = buildConstant(Dst.getLLTTy(*getMRI()), 0);
+    return buildInstr(TargetOpcode::G_SUB, {Dst}, {Zero, Src0});
+  }
+
   /// Build and insert \p Res = G_CTPOP \p Op0, \p Src0
   MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0) {
     return buildInstr(TargetOpcode::G_CTPOP, {Dst}, {Src0});
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
index 818475a48abb..86545b976b8d 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
@@ -14,6 +14,9 @@
 #ifndef LLVM_CODEGEN_GLOBALISEL_UTILS_H
 #define LLVM_CODEGEN_GLOBALISEL_UTILS_H
 
+#include "GISelWorkList.h"
+#include "LostDebugLocObserver.h"
+#include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/Register.h"
@@ -44,6 +47,7 @@ class TargetRegisterClass;
 class ConstantInt;
 class ConstantFP;
 class APFloat;
+class MachineIRBuilder;
 
 // Convenience macros for dealing with vector reduction opcodes.
 #define GISEL_VECREDUCE_CASES_ALL                                              \
@@ -162,13 +166,12 @@ void reportGISelWarning(MachineFunction &MF, const TargetPassConfig &TPC,
                         MachineOptimizationRemarkMissed &R);
 
 /// If \p VReg is defined by a G_CONSTANT, return the corresponding value.
-Optional<APInt> getConstantVRegVal(Register VReg,
-                                   const MachineRegisterInfo &MRI);
+Optional<APInt> getIConstantVRegVal(Register VReg,
+                                    const MachineRegisterInfo &MRI);
 
-/// If \p VReg is defined by a G_CONSTANT fits in int64_t
-/// returns it.
-Optional<int64_t> getConstantVRegSExtVal(Register VReg,
-                                         const MachineRegisterInfo &MRI);
+/// If \p VReg is defined by a G_CONSTANT fits in int64_t returns it.
+Optional<int64_t> getIConstantVRegSExtVal(Register VReg,
+                                          const MachineRegisterInfo &MRI);
 
 /// Simple struct used to hold a constant integer value and a virtual
 /// register.
@@ -176,22 +179,32 @@ struct ValueAndVReg {
   APInt Value;
   Register VReg;
 };
-/// If \p VReg is defined by a statically evaluable chain of
-/// instructions rooted on a G_F/CONSTANT (\p LookThroughInstrs == true)
-/// and that constant fits in int64_t, returns its value as well as the
-/// virtual register defined by this G_F/CONSTANT.
-/// When \p LookThroughInstrs == false this function behaves like
-/// getConstantVRegVal.
-/// When \p HandleFConstants == false the function bails on G_FCONSTANTs.
-/// When \p LookThroughAnyExt == true the function treats G_ANYEXT same as
-/// G_SEXT.
+
+/// If \p VReg is defined by a statically evaluable chain of instructions rooted
+/// on a G_CONSTANT returns its APInt value and def register.
 Optional<ValueAndVReg>
-getConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI,
-                                  bool LookThroughInstrs = true,
-                                  bool HandleFConstants = true,
-                                  bool LookThroughAnyExt = false);
-const ConstantInt *getConstantIntVRegVal(Register VReg,
-                                         const MachineRegisterInfo &MRI);
+getIConstantVRegValWithLookThrough(Register VReg,
+                                   const MachineRegisterInfo &MRI,
+                                   bool LookThroughInstrs = true);
+
+/// If \p VReg is defined by a statically evaluable chain of instructions rooted
+/// on a G_CONSTANT or G_FCONSTANT returns its value as APInt and def register.
+Optional<ValueAndVReg> getAnyConstantVRegValWithLookThrough(
+    Register VReg, const MachineRegisterInfo &MRI,
+    bool LookThroughInstrs = true, bool LookThroughAnyExt = false);
+
+struct FPValueAndVReg {
+  APFloat Value;
+  Register VReg;
+};
+
+/// If \p VReg is defined by a statically evaluable chain of instructions rooted
+/// on a G_FCONSTANT returns its APFloat value and def register.
+Optional<FPValueAndVReg>
+getFConstantVRegValWithLookThrough(Register VReg,
+                                   const MachineRegisterInfo &MRI,
+                                   bool LookThroughInstrs = true);
+
 const ConstantFP* getConstantFPVRegVal(Register VReg,
                                        const MachineRegisterInfo &MRI);
 
@@ -254,6 +267,14 @@ Optional<APFloat> ConstantFoldFPBinOp(unsigned Opcode, const Register Op1,
                                       const Register Op2,
                                       const MachineRegisterInfo &MRI);
 
+/// Tries to constant fold a vector binop with sources \p Op1 and \p Op2.
+/// If successful, returns the G_BUILD_VECTOR representing the folded vector
+/// constant. \p MIB should have an insertion point already set to create new
+/// G_CONSTANT instructions as needed.
+Optional<MachineInstr *>
+ConstantFoldVectorBinop(unsigned Opcode, const Register Op1, const Register Op2,
+                        const MachineRegisterInfo &MRI, MachineIRBuilder &MIB);
+
 Optional<APInt> ConstantFoldExtOp(unsigned Opcode, const Register Op1,
                                   uint64_t Imm, const MachineRegisterInfo &MRI);
 
@@ -261,6 +282,11 @@ Optional<APFloat> ConstantFoldIntToFloat(unsigned Opcode, LLT DstTy,
                                          Register Src,
                                          const MachineRegisterInfo &MRI);
 
+/// Tries to constant fold a G_CTLZ operation on \p Src. If \p Src is a vector
+/// then it tries to do an element-wise constant fold.
+Optional<SmallVector<unsigned>>
+ConstantFoldCTLZ(Register Src, const MachineRegisterInfo &MRI);
+
 /// Test if the given value is known to have exactly one bit set. This differs
 /// from computeKnownBits in that it doesn't necessarily determine which bit is
 /// set.
@@ -346,15 +372,23 @@ Optional<int> getSplatIndex(MachineInstr &MI);
 Optional<int64_t> getBuildVectorConstantSplat(const MachineInstr &MI,
                                               const MachineRegisterInfo &MRI);
 
+/// Returns a floating point scalar constant of a build vector splat if it
+/// exists. When \p AllowUndef == true some elements can be undef but not all.
+Optional<FPValueAndVReg> getFConstantSplat(Register VReg,
+                                           const MachineRegisterInfo &MRI,
+                                           bool AllowUndef = true);
+
 /// Return true if the specified instruction is a G_BUILD_VECTOR or
 /// G_BUILD_VECTOR_TRUNC where all of the elements are 0 or undef.
 bool isBuildVectorAllZeros(const MachineInstr &MI,
-                           const MachineRegisterInfo &MRI);
+                           const MachineRegisterInfo &MRI,
+                           bool AllowUndef = false);
 
 /// Return true if the specified instruction is a G_BUILD_VECTOR or
 /// G_BUILD_VECTOR_TRUNC where all of the elements are ~0 or undef.
 bool isBuildVectorAllOnes(const MachineInstr &MI,
-                          const MachineRegisterInfo &MRI);
+                          const MachineRegisterInfo &MRI,
+                          bool AllowUndef = false);
 
 /// \returns a value when \p MI is a vector splat. The splat can be either a
 /// Register or a constant.
@@ -378,6 +412,17 @@ bool isBuildVectorAllOnes(const MachineInstr &MI,
 Optional<RegOrConstant> getVectorSplat(const MachineInstr &MI,
                                        const MachineRegisterInfo &MRI);
 
+/// Determines if \p MI defines a constant integer or a build vector of
+/// constant integers. Treats undef values as constants.
+bool isConstantOrConstantVector(MachineInstr &MI,
+                                const MachineRegisterInfo &MRI);
+
+/// Determines if \p MI defines a constant integer or a splat vector of
+/// constant integers.
+/// \returns the scalar constant or None.
+Optional<APInt> isConstantOrConstantSplatVector(MachineInstr &MI,
+                                                const MachineRegisterInfo &MRI);
+
 /// Attempt to match a unary predicate against a scalar/splat constant or every
 /// element of a constant G_BUILD_VECTOR. If \p ConstVal is null, the source
 /// value was undef.
@@ -398,5 +443,14 @@ int64_t getICmpTrueVal(const TargetLowering &TLI, bool IsVector, bool IsFP);
 bool shouldOptForSize(const MachineBasicBlock &MBB, ProfileSummaryInfo *PSI,
                       BlockFrequencyInfo *BFI);
 
+using SmallInstListTy = GISelWorkList<4>;
+void saveUsesAndErase(MachineInstr &MI, MachineRegisterInfo &MRI,
+                      LostDebugLocObserver *LocObserver,
+                      SmallInstListTy &DeadInstChain);
+void eraseInstrs(ArrayRef<MachineInstr *> DeadInstrs, MachineRegisterInfo &MRI,
+                 LostDebugLocObserver *LocObserver = nullptr);
+void eraseInstr(MachineInstr &MI, MachineRegisterInfo &MRI,
+                LostDebugLocObserver *LocObserver = nullptr);
+
 } // End namespace llvm.
 #endif
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index 6803f4d76cf0..fd106f55a43d 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -1260,6 +1260,11 @@ static const int FIRST_TARGET_STRICTFP_OPCODE = BUILTIN_OP_END + 400;
 /// be used with SelectionDAG::getMemIntrinsicNode.
 static const int FIRST_TARGET_MEMORY_OPCODE = BUILTIN_OP_END + 500;
 
+/// Whether this is bitwise logic opcode.
+inline bool isBitwiseLogicOp(unsigned Opcode) {
+  return Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR;
+}
+
 /// Get underlying scalar opcode for VECREDUCE opcode.
 /// For example ISD::AND for ISD::VECREDUCE_AND.
 NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode);
@@ -1267,6 +1272,12 @@ NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode);
 /// Whether this is a vector-predicated Opcode.
 bool isVPOpcode(unsigned Opcode);
 
+/// Whether this is a vector-predicated binary operation opcode.
+bool isVPBinaryOp(unsigned Opcode);
+
+/// Whether this is a vector-predicated reduction opcode.
+bool isVPReduction(unsigned Opcode);
+
 /// The operand position of the vector mask.
 Optional<unsigned> getVPMaskIdx(unsigned Opcode);
 
diff --git a/llvm/include/llvm/CodeGen/IndirectThunks.h b/llvm/include/llvm/CodeGen/IndirectThunks.h
index 74973f38bc79..90f9912f0ee0 100644
--- a/llvm/include/llvm/CodeGen/IndirectThunks.h
+++ b/llvm/include/llvm/CodeGen/IndirectThunks.h
@@ -62,7 +62,7 @@ void ThunkInserter<Derived>::createThunkFunction(MachineModuleInfo &MMI,
   AttrBuilder B;
   B.addAttribute(llvm::Attribute::NoUnwind);
   B.addAttribute(llvm::Attribute::Naked);
-  F->addAttributes(llvm::AttributeList::FunctionIndex, B);
+  F->addFnAttrs(B);
 
   // Populate our function a bit so that we can verify.
   BasicBlock *Entry = BasicBlock::Create(Ctx, "entry", F);
diff --git a/llvm/include/llvm/CodeGen/LinkAllAsmWriterComponents.h b/llvm/include/llvm/CodeGen/LinkAllAsmWriterComponents.h
index 81b0025fdddc..c22f9d49f374 100644
--- a/llvm/include/llvm/CodeGen/LinkAllAsmWriterComponents.h
+++ b/llvm/include/llvm/CodeGen/LinkAllAsmWriterComponents.h
@@ -24,6 +24,9 @@ namespace {
       // delete it all as dead code, even with whole program optimization,
       // yet is effectively a NO-OP. As the compiler isn't smart enough
       // to know that getenv() never returns -1, this will do the job.
+      // This is so that globals in the translation units where these functions
+      // are defined are forced to be initialized, populating various
+      // registries.
       if (std::getenv("bar") != (char*) -1)
         return;
 
diff --git a/llvm/include/llvm/CodeGen/LinkAllCodegenComponents.h b/llvm/include/llvm/CodeGen/LinkAllCodegenComponents.h
index 1b13ff53ac85..d615a5db4504 100644
--- a/llvm/include/llvm/CodeGen/LinkAllCodegenComponents.h
+++ b/llvm/include/llvm/CodeGen/LinkAllCodegenComponents.h
@@ -27,6 +27,9 @@ namespace {
       // delete it all as dead code, even with whole program optimization,
       // yet is effectively a NO-OP. As the compiler isn't smart enough
       // to know that getenv() never returns -1, this will do the job.
+      // This is so that globals in the translation units where these functions
+      // are defined are forced to be initialized, populating various
+      // registries.
       if (std::getenv("bar") != (char*) -1)
         return;
 
diff --git a/llvm/include/llvm/CodeGen/LiveInterval.h b/llvm/include/llvm/CodeGen/LiveInterval.h
index c2b158ac1b7f..923a45821dd4 100644
--- a/llvm/include/llvm/CodeGen/LiveInterval.h
+++ b/llvm/include/llvm/CodeGen/LiveInterval.h
@@ -521,11 +521,11 @@ namespace llvm {
       removeSegment(S.start, S.end, RemoveDeadValNo);
     }
 
-    /// Remove segment pointed to by iterator @p I from this range.  This does
-    /// not remove dead value numbers.
-    iterator removeSegment(iterator I) {
-      return segments.erase(I);
-    }
+    /// Remove segment pointed to by iterator @p I from this range.
+    iterator removeSegment(iterator I, bool RemoveDeadValNo = false);
+
+    /// Mark \p ValNo for deletion if no segments in this range use it.
+    void removeValNoIfDead(VNInfo *ValNo);
 
     /// Query Liveness at Idx.
     /// The sub-instruction slot of Idx doesn't matter, only the instruction
diff --git a/llvm/include/llvm/CodeGen/LiveIntervalUnion.h b/llvm/include/llvm/CodeGen/LiveIntervalUnion.h
index 4ebe0f2dcfd8..3b6a4a379d72 100644
--- a/llvm/include/llvm/CodeGen/LiveIntervalUnion.h
+++ b/llvm/include/llvm/CodeGen/LiveIntervalUnion.h
@@ -114,12 +114,19 @@ public:
     const LiveRange *LR = nullptr;
     LiveRange::const_iterator LRI;  ///< current position in LR
     ConstSegmentIter LiveUnionI;    ///< current position in LiveUnion
-    Optional<SmallVector<LiveInterval *, 4>> InterferingVRegs;
+    SmallVector<LiveInterval *, 4> InterferingVRegs;
     bool CheckedFirstInterference = false;
     bool SeenAllInterferences = false;
     unsigned Tag = 0;
     unsigned UserTag = 0;
 
+    // Count the virtual registers in this union that interfere with this
+    // query's live virtual register, up to maxInterferingRegs.
+    unsigned collectInterferingVRegs(unsigned MaxInterferingRegs);
+
+    // Was this virtual register visited during collectInterferingVRegs?
+    bool isSeenInterference(LiveInterval *VirtReg) const;
+
   public:
     Query() = default;
     Query(const LiveRange &LR, const LiveIntervalUnion &LIU)
@@ -131,7 +138,7 @@ public:
                const LiveIntervalUnion &NewLiveUnion) {
       LiveUnion = &NewLiveUnion;
       LR = &NewLR;
-      InterferingVRegs = None;
+      InterferingVRegs.clear();
       CheckedFirstInterference = false;
       SeenAllInterferences = false;
       Tag = NewLiveUnion.getTag();
@@ -151,20 +158,12 @@ public:
     // Does this live virtual register interfere with the union?
     bool checkInterference() { return collectInterferingVRegs(1); }
 
-    // Count the virtual registers in this union that interfere with this
-    // query's live virtual register, up to maxInterferingRegs.
-    unsigned collectInterferingVRegs(
-        unsigned MaxInterferingRegs = std::numeric_limits<unsigned>::max());
-
-    // Was this virtual register visited during collectInterferingVRegs?
-    bool isSeenInterference(LiveInterval *VirtReg) const;
-
-    // Did collectInterferingVRegs collect all interferences?
-    bool seenAllInterferences() const { return SeenAllInterferences; }
-
     // Vector generated by collectInterferingVRegs.
-    const SmallVectorImpl<LiveInterval*> &interferingVRegs() const {
-      return *InterferingVRegs;
+    const SmallVectorImpl<LiveInterval *> &interferingVRegs(
+        unsigned MaxInterferingRegs = std::numeric_limits<unsigned>::max()) {
+      if (!SeenAllInterferences || MaxInterferingRegs < InterferingVRegs.size())
+        collectInterferingVRegs(MaxInterferingRegs);
+      return InterferingVRegs;
     }
   };
 
diff --git a/llvm/include/llvm/CodeGen/LiveVariables.h b/llvm/include/llvm/CodeGen/LiveVariables.h
index 9b0667bbbeb0..dee316677b25 100644
--- a/llvm/include/llvm/CodeGen/LiveVariables.h
+++ b/llvm/include/llvm/CodeGen/LiveVariables.h
@@ -188,6 +188,12 @@ public:
   //===--------------------------------------------------------------------===//
   //  API to update live variable information
 
+  /// Recompute liveness from scratch for a virtual register \p Reg that is
+  /// known to have a single def that dominates all uses. This can be useful
+  /// after removing some uses of \p Reg. It is not necessary for the whole
+  /// machine function to be in SSA form.
+  void recomputeForSingleDefVirtReg(Register Reg);
+
   /// replaceKillInstruction - Update register kill info by replacing a kill
   /// instruction with a new one.
   void replaceKillInstruction(Register Reg, MachineInstr &OldMI,
diff --git a/llvm/include/llvm/CodeGen/LowLevelType.h b/llvm/include/llvm/CodeGen/LowLevelType.h
index 40985e16b37a..922f93d2e598 100644
--- a/llvm/include/llvm/CodeGen/LowLevelType.h
+++ b/llvm/include/llvm/CodeGen/LowLevelType.h
@@ -16,8 +16,8 @@
 #ifndef LLVM_CODEGEN_LOWLEVELTYPE_H
 #define LLVM_CODEGEN_LOWLEVELTYPE_H
 
+#include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/Support/LowLevelTypeImpl.h"
-#include "llvm/Support/MachineValueType.h"
 
 namespace llvm {
 
@@ -31,6 +31,7 @@ LLT getLLTForType(Type &Ty, const DataLayout &DL);
 /// Get a rough equivalent of an MVT for a given LLT. MVT can't distinguish
 /// pointers, so these will convert to a plain integer.
 MVT getMVTForLLT(LLT Ty);
+EVT getApproximateEVTForLLT(LLT Ty, const DataLayout &DL, LLVMContext &Ctx);
 
 /// Get a rough equivalent of an LLT for a given MVT. LLT does not yet support
 /// scalarable vector types, and will assert if used.
diff --git a/llvm/include/llvm/CodeGen/MIRFSDiscriminator.h b/llvm/include/llvm/CodeGen/MIRFSDiscriminator.h
index 6137411b6dba..deb6b37a9bcf 100644
--- a/llvm/include/llvm/CodeGen/MIRFSDiscriminator.h
+++ b/llvm/include/llvm/CodeGen/MIRFSDiscriminator.h
@@ -57,6 +57,10 @@ public:
     assert(LowBit < HighBit && "HighBit needs to be greater than Lowbit");
   }
 
+  StringRef getPassName() const override {
+    return "Add FS discriminators in MIR";
+  }
+
   /// getNumFSBBs() - Return the number of machine BBs that have FS samples.
   unsigned getNumFSBBs();
 
diff --git a/llvm/include/llvm/CodeGen/MIRFormatter.h b/llvm/include/llvm/CodeGen/MIRFormatter.h
index 9cb92091db50..12c90600f6df 100644
--- a/llvm/include/llvm/CodeGen/MIRFormatter.h
+++ b/llvm/include/llvm/CodeGen/MIRFormatter.h
@@ -1,9 +1,8 @@
 //===-- llvm/CodeGen/MIRFormatter.h -----------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/llvm/include/llvm/CodeGen/MIRSampleProfile.h b/llvm/include/llvm/CodeGen/MIRSampleProfile.h
new file mode 100644
index 000000000000..2503524ccfdf
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/MIRSampleProfile.h
@@ -0,0 +1,76 @@
+//===----- MIRSampleProfile.h: SampleFDO Support in MIR ---*- c++ -*-------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the supoorting functions for machine level Sample FDO
+// loader. This is used in Flow Sensitive SampelFDO.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MIRSAMPLEPROFILE_H
+#define LLVM_CODEGEN_MIRSAMPLEPROFILE_H
+
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/ProfileData/SampleProf.h"
+#include "llvm/ProfileData/SampleProfReader.h"
+
+#include <cassert>
+
+namespace llvm {
+
+using namespace sampleprof;
+
+class MIRProfileLoader;
+class MIRProfileLoaderPass : public MachineFunctionPass {
+  MachineFunction *MF;
+  std::string ProfileFileName;
+  FSDiscriminatorPass P;
+  unsigned LowBit;
+  unsigned HighBit;
+
+public:
+  static char ID;
+  /// FS bits will only use the '1' bits in the Mask.
+  MIRProfileLoaderPass(std::string FileName = "",
+                       std::string RemappingFileName = "",
+                       FSDiscriminatorPass P = FSDiscriminatorPass::Pass1);
+
+  /// getMachineFunction - Return the last machine function computed.
+  const MachineFunction *getMachineFunction() const { return MF; }
+
+  StringRef getPassName() const override { return "SampleFDO loader in MIR"; }
+
+private:
+  void init(MachineFunction &MF);
+  bool runOnMachineFunction(MachineFunction &) override;
+  bool doInitialization(Module &M) override;
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+  std::unique_ptr<MIRProfileLoader> MIRSampleLoader;
+  /// Hold the information of the basic block frequency.
+  MachineBlockFrequencyInfo *MBFI;
+};
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_MIRSAMPLEPROFILE_H
diff --git a/llvm/include/llvm/CodeGen/MIRYamlMapping.h b/llvm/include/llvm/CodeGen/MIRYamlMapping.h
index e7428e7ad260..b6d7c2487126 100644
--- a/llvm/include/llvm/CodeGen/MIRYamlMapping.h
+++ b/llvm/include/llvm/CodeGen/MIRYamlMapping.h
@@ -694,6 +694,7 @@ struct MachineFunction {
   // Register information
   bool TracksRegLiveness = false;
   bool HasWinCFI = false;
+  bool FailsVerification = false;
   std::vector<VirtualRegisterDefinition> VirtualRegisters;
   std::vector<MachineFunctionLiveIn> LiveIns;
   Optional<std::vector<FlowStringValue>> CalleeSavedRegisters;
@@ -722,6 +723,7 @@ template <> struct MappingTraits<MachineFunction> {
     YamlIO.mapOptional("failedISel", MF.FailedISel, false);
     YamlIO.mapOptional("tracksRegLiveness", MF.TracksRegLiveness, false);
     YamlIO.mapOptional("hasWinCFI", MF.HasWinCFI, false);
+    YamlIO.mapOptional("failsVerification", MF.FailsVerification, false);
     YamlIO.mapOptional("registers", MF.VirtualRegisters,
                        std::vector<VirtualRegisterDefinition>());
     YamlIO.mapOptional("liveins", MF.LiveIns,
diff --git a/llvm/include/llvm/CodeGen/MachineCombinerPattern.h b/llvm/include/llvm/CodeGen/MachineCombinerPattern.h
index ac0cc70744d1..67544779f34c 100644
--- a/llvm/include/llvm/CodeGen/MachineCombinerPattern.h
+++ b/llvm/include/llvm/CodeGen/MachineCombinerPattern.h
@@ -153,7 +153,18 @@ enum class MachineCombinerPattern {
   FMLSv4f32_OP1,
   FMLSv4f32_OP2,
   FMLSv4i32_indexed_OP1,
-  FMLSv4i32_indexed_OP2
+  FMLSv4i32_indexed_OP2,
+
+  FMULv2i32_indexed_OP1,
+  FMULv2i32_indexed_OP2,
+  FMULv2i64_indexed_OP1,
+  FMULv2i64_indexed_OP2,
+  FMULv4i16_indexed_OP1,
+  FMULv4i16_indexed_OP2,
+  FMULv4i32_indexed_OP1,
+  FMULv4i32_indexed_OP2,
+  FMULv8i16_indexed_OP1,
+  FMULv8i16_indexed_OP2,
 };
 
 } // end namespace llvm
diff --git a/llvm/include/llvm/CodeGen/MachineDominators.h b/llvm/include/llvm/CodeGen/MachineDominators.h
index 46bf73cdd7b6..f749e9ff7e0a 100644
--- a/llvm/include/llvm/CodeGen/MachineDominators.h
+++ b/llvm/include/llvm/CodeGen/MachineDominators.h
@@ -36,6 +36,7 @@ extern template class DomTreeNodeBase<MachineBasicBlock>;
 extern template class DominatorTreeBase<MachineBasicBlock, false>; // DomTree
 extern template class DominatorTreeBase<MachineBasicBlock, true>; // PostDomTree
 
+using MachineDomTree = DomTreeBase<MachineBasicBlock>;
 using MachineDomTreeNode = DomTreeNodeBase<MachineBasicBlock>;
 
 //===-------------------------------------
@@ -43,8 +44,6 @@ using MachineDomTreeNode = DomTreeNodeBase<MachineBasicBlock>;
 /// compute a normal dominator tree.
 ///
 class MachineDominatorTree : public MachineFunctionPass {
-  using DomTreeT = DomTreeBase<MachineBasicBlock>;
-
   /// Helper structure used to hold all the basic blocks
   /// involved in the split of a critical edge.
   struct CriticalEdge {
@@ -67,7 +66,7 @@ class MachineDominatorTree : public MachineFunctionPass {
   mutable SmallSet<MachineBasicBlock *, 32> NewBBs;
 
   /// The DominatorTreeBase that is used to compute a normal dominator tree.
-  std::unique_ptr<DomTreeT> DT;
+  std::unique_ptr<MachineDomTree> DT;
 
   /// Apply all the recorded critical edges to the DT.
   /// This updates the underlying DT information in a way that uses
@@ -84,8 +83,9 @@ public:
     calculate(MF);
   }
 
-  DomTreeT &getBase() {
-    if (!DT) DT.reset(new DomTreeT());
+  MachineDomTree &getBase() {
+    if (!DT)
+      DT.reset(new MachineDomTree());
     applySplitCriticalEdges();
     return *DT;
   }
@@ -112,6 +112,12 @@ public:
     return DT->dominates(A, B);
   }
 
+  void getDescendants(MachineBasicBlock *A,
+                      SmallVectorImpl<MachineBasicBlock *> &Result) {
+    applySplitCriticalEdges();
+    DT->getDescendants(A, Result);
+  }
+
   bool dominates(const MachineBasicBlock *A, const MachineBasicBlock *B) const {
     applySplitCriticalEdges();
     return DT->dominates(A, B);
diff --git a/llvm/include/llvm/CodeGen/MachineFrameInfo.h b/llvm/include/llvm/CodeGen/MachineFrameInfo.h
index 28a59703dc60..5df468102a8a 100644
--- a/llvm/include/llvm/CodeGen/MachineFrameInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineFrameInfo.h
@@ -342,6 +342,8 @@ public:
       : StackAlignment(assumeAligned(StackAlignment)),
         StackRealignable(StackRealignable), ForcedRealign(ForcedRealign) {}
 
+  MachineFrameInfo(const MachineFrameInfo &) = delete;
+
   /// Return true if there are any stack objects in this function.
   bool hasStackObjects() const { return !Objects.empty(); }
 
diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h
index 786fe908f68f..dcbd19ac6b5a 100644
--- a/llvm/include/llvm/CodeGen/MachineFunction.h
+++ b/llvm/include/llvm/CodeGen/MachineFunction.h
@@ -149,6 +149,9 @@ public:
   //  all sizes attached to them have been eliminated.
   // TiedOpsRewritten: The twoaddressinstruction pass will set this flag, it
   //  means that tied-def have been rewritten to meet the RegConstraint.
+  // FailsVerification: Means that the function is not expected to pass machine
+  //  verification. This can be set by passes that introduce known problems that
+  //  have not been fixed yet.
   enum class Property : unsigned {
     IsSSA,
     NoPHIs,
@@ -159,7 +162,8 @@ public:
     RegBankSelected,
     Selected,
     TiedOpsRewritten,
-    LastProperty = TiedOpsRewritten,
+    FailsVerification,
+    LastProperty = FailsVerification,
   };
 
   bool hasProperty(Property P) const {
@@ -227,7 +231,7 @@ struct LandingPadInfo {
       : LandingPadBlock(MBB) {}
 };
 
-class MachineFunction {
+class LLVM_EXTERNAL_VISIBILITY MachineFunction {
   Function &F;
   const LLVMTargetMachine &Target;
   const TargetSubtargetInfo *STI;
@@ -536,6 +540,14 @@ public:
   /// (or DBG_PHI).
   void finalizeDebugInstrRefs();
 
+  /// Returns true if the function's variable locations should be tracked with
+  /// instruction referencing.
+  bool useDebugInstrRef() const;
+
+  /// A reserved operand number representing the instructions memory operand,
+  /// for instructions that have a stack spill fused into them.
+  const static unsigned int DebugOperandMemNumber;
+
   MachineFunction(Function &F, const LLVMTargetMachine &Target,
                   const TargetSubtargetInfo &STI, unsigned FunctionNum,
                   MachineModuleInfo &MMI);
diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h
index 757907f6d887..0ac934e208b6 100644
--- a/llvm/include/llvm/CodeGen/MachineInstr.h
+++ b/llvm/include/llvm/CodeGen/MachineInstr.h
@@ -517,7 +517,7 @@ public:
   SmallSet<Register, 4> getUsedDebugRegs() const {
     assert(isDebugValue() && "not a DBG_VALUE*");
     SmallSet<Register, 4> UsedRegs;
-    for (auto MO : debug_operands())
+    for (const auto &MO : debug_operands())
       if (MO.isReg() && MO.getReg())
         UsedRegs.insert(MO.getReg());
     return UsedRegs;
@@ -1331,6 +1331,7 @@ public:
     case TargetOpcode::LIFETIME_START:
     case TargetOpcode::LIFETIME_END:
     case TargetOpcode::PSEUDO_PROBE:
+    case TargetOpcode::ARITH_FENCE:
       return true;
     }
   }
@@ -1859,17 +1860,6 @@ public:
     }
   }
 
-  PseudoProbeAttributes getPseudoProbeAttribute() const {
-    assert(isPseudoProbe() && "Must be a pseudo probe instruction");
-    return (PseudoProbeAttributes)getOperand(3).getImm();
-  }
-
-  void addPseudoProbeAttribute(PseudoProbeAttributes Attr) {
-    assert(isPseudoProbe() && "Must be a pseudo probe instruction");
-    MachineOperand &AttrOperand = getOperand(3);
-    AttrOperand.setImm(AttrOperand.getImm() | (uint32_t)Attr);
-  }
-
 private:
   /// If this instruction is embedded into a MachineFunction, return the
   /// MachineRegisterInfo object for the current function, otherwise
diff --git a/llvm/include/llvm/CodeGen/MachineMemOperand.h b/llvm/include/llvm/CodeGen/MachineMemOperand.h
index 07b8e5ebcc1d..00080b171974 100644
--- a/llvm/include/llvm/CodeGen/MachineMemOperand.h
+++ b/llvm/include/llvm/CodeGen/MachineMemOperand.h
@@ -282,17 +282,7 @@ public:
   /// success and failure orderings for an atomic operation.  (For operations
   /// other than cmpxchg, this is equivalent to getSuccessOrdering().)
   AtomicOrdering getMergedOrdering() const {
-    AtomicOrdering Ordering = getSuccessOrdering();
-    AtomicOrdering FailureOrdering = getFailureOrdering();
-    if (FailureOrdering == AtomicOrdering::SequentiallyConsistent)
-      return AtomicOrdering::SequentiallyConsistent;
-    if (FailureOrdering == AtomicOrdering::Acquire) {
-      if (Ordering == AtomicOrdering::Monotonic)
-        return AtomicOrdering::Acquire;
-      if (Ordering == AtomicOrdering::Release)
-        return AtomicOrdering::AcquireRelease;
-    }
-    return Ordering;
+    return getMergedAtomicOrdering(getSuccessOrdering(), getFailureOrdering());
   }
 
   bool isLoad() const { return FlagVals & MOLoad; }
diff --git a/llvm/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h b/llvm/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h
index 8cc5909c40b7..285b858c96cb 100644
--- a/llvm/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h
+++ b/llvm/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h
@@ -118,6 +118,12 @@ public:
       : DiagnosticInfoMIROptimization(DK_MachineOptimizationRemarkAnalysis,
                                       PassName, RemarkName, Loc, MBB) {}
 
+  MachineOptimizationRemarkAnalysis(const char *PassName, StringRef RemarkName,
+                                    const MachineInstr *MI)
+      : DiagnosticInfoMIROptimization(DK_MachineOptimizationRemarkAnalysis,
+                                      PassName, RemarkName, MI->getDebugLoc(),
+                                      MI->getParent()) {}
+
   static bool classof(const DiagnosticInfo *DI) {
     return DI->getKind() == DK_MachineOptimizationRemarkAnalysis;
   }
diff --git a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
index ca3dd992bbd5..dbabfe5f0f32 100644
--- a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
@@ -821,7 +821,7 @@ public:
   /// deleted during LiveDebugVariables analysis.
   void markUsesInDebugValueAsUndef(Register Reg) const;
 
-  /// updateDbgUsersToReg - Update a collection of DBG_VALUE instructions
+  /// updateDbgUsersToReg - Update a collection of debug instructions
   /// to refer to the designated register.
   void updateDbgUsersToReg(MCRegister OldReg, MCRegister NewReg,
                            ArrayRef<MachineInstr *> Users) const {
@@ -829,21 +829,34 @@ public:
     for (MCRegUnitIterator RUI(OldReg, getTargetRegisterInfo()); RUI.isValid();
          ++RUI)
       OldRegUnits.insert(*RUI);
-    for (MachineInstr *MI : Users) {
-      assert(MI->isDebugValue());
-      for (auto &Op : MI->debug_operands()) {
-        if (Op.isReg()) {
-          for (MCRegUnitIterator RUI(OldReg, getTargetRegisterInfo());
-               RUI.isValid(); ++RUI) {
-            if (OldRegUnits.contains(*RUI)) {
-              Op.setReg(NewReg);
-              break;
-            }
+
+    // If this operand is a register, check whether it overlaps with OldReg.
+    // If it does, replace with NewReg.
+    auto UpdateOp = [this, &NewReg, &OldReg, &OldRegUnits](MachineOperand &Op) {
+      if (Op.isReg()) {
+        for (MCRegUnitIterator RUI(OldReg, getTargetRegisterInfo());
+             RUI.isValid(); ++RUI) {
+          if (OldRegUnits.contains(*RUI)) {
+            Op.setReg(NewReg);
+            break;
           }
         }
       }
-      assert(MI->hasDebugOperandForReg(NewReg) &&
-             "Expected debug value to have some overlap with OldReg");
+    };
+
+    // Iterate through (possibly several) operands to DBG_VALUEs and update
+    // each. For DBG_PHIs, only one operand will be present.
+    for (MachineInstr *MI : Users) {
+      if (MI->isDebugValue()) {
+        for (auto &Op : MI->debug_operands())
+          UpdateOp(Op);
+        assert(MI->hasDebugOperandForReg(NewReg) &&
+               "Expected debug value to have some overlap with OldReg");
+      } else if (MI->isDebugPHI()) {
+        UpdateOp(MI->getOperand(0));
+      } else {
+        llvm_unreachable("Non-DBG_VALUE, Non-DBG_PHI debug instr updated");
+      }
     }
   }
 
@@ -964,7 +977,7 @@ public:
   MCRegister getLiveInPhysReg(Register VReg) const;
 
   /// getLiveInVirtReg - If PReg is a live-in physical register, return the
-  /// corresponding live-in physical register.
+  /// corresponding live-in virtual register.
   Register getLiveInVirtReg(MCRegister PReg) const;
 
   /// EmitLiveInCopies - Emit copies to initialize livein virtual registers
diff --git a/llvm/include/llvm/CodeGen/MacroFusion.h b/llvm/include/llvm/CodeGen/MacroFusion.h
index 3a140fe63fde..ea2c7a5faae3 100644
--- a/llvm/include/llvm/CodeGen/MacroFusion.h
+++ b/llvm/include/llvm/CodeGen/MacroFusion.h
@@ -23,6 +23,8 @@ class MachineInstr;
 class ScheduleDAGMutation;
 class TargetInstrInfo;
 class TargetSubtargetInfo;
+class ScheduleDAGInstrs;
+class SUnit;
 
 /// Check if the instr pair, FirstMI and SecondMI, should be fused
 /// together. Given SecondMI, when FirstMI is unspecified, then check if
@@ -32,6 +34,18 @@ using ShouldSchedulePredTy = std::function<bool(const TargetInstrInfo &TII,
                                                 const MachineInstr *FirstMI,
                                                 const MachineInstr &SecondMI)>;
 
+/// Checks if the number of cluster edges between SU and its predecessors is
+/// less than FuseLimit
+bool hasLessThanNumFused(const SUnit &SU, unsigned FuseLimit);
+
+/// Create an artificial edge between FirstSU and SecondSU.
+/// Make data dependencies from the FirstSU also dependent on the SecondSU to
+/// prevent them from being scheduled between the FirstSU and the SecondSU
+/// and vice-versa.
+/// Fusing more than 2 instructions is not currently supported.
+bool fuseInstructionPair(ScheduleDAGInstrs &DAG, SUnit &FirstSU,
+                         SUnit &SecondSU);
+
 /// Create a DAG scheduling mutation to pair instructions back to back
 /// for instructions that benefit according to the target-specific
 /// shouldScheduleAdjacent predicate function.
diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h
index da1bab718948..d5ad12fadfa0 100644
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@@ -37,6 +37,10 @@ class raw_ostream;
 
 // List of target independent CodeGen pass IDs.
 namespace llvm {
+
+  /// AtomicExpandPass - At IR level this pass replace atomic instructions with
+  /// __atomic_* library calls, or target specific instruction which implement the
+  /// same semantics in a way which better fits the target backend.
   FunctionPass *createAtomicExpandPass();
 
   /// createUnreachableBlockEliminationPass - The LLVM code generator does not
@@ -171,6 +175,9 @@ namespace llvm {
   /// This pass adds flow sensitive discriminators.
   extern char &MIRAddFSDiscriminatorsID;
 
+  /// This pass reads flow sensitive profile.
+  extern char &MIRProfileLoaderPassID;
+
   /// FastRegisterAllocation Pass - This pass register allocates as fast as
   /// possible. It is best suited for debug code where live ranges are short.
   ///
@@ -513,6 +520,11 @@ namespace llvm {
   FunctionPass *
   createMIRAddFSDiscriminatorsPass(sampleprof::FSDiscriminatorPass P);
 
+  /// Read Flow Sensitive Profile.
+  FunctionPass *createMIRProfileLoaderPass(std::string File,
+                                           std::string RemappingFile,
+                                           sampleprof::FSDiscriminatorPass P);
+
   /// Creates MIR Debugify pass. \see MachineDebugify.cpp
   ModulePass *createDebugifyMachineModulePass();
 
diff --git a/llvm/include/llvm/CodeGen/RegAllocCommon.h b/llvm/include/llvm/CodeGen/RegAllocCommon.h
index 39b77d919370..757ca8e112ee 100644
--- a/llvm/include/llvm/CodeGen/RegAllocCommon.h
+++ b/llvm/include/llvm/CodeGen/RegAllocCommon.h
@@ -1,9 +1,8 @@
 //===- RegAllocCommon.h - Utilities shared between allocators ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/llvm/include/llvm/CodeGen/RegisterScavenging.h b/llvm/include/llvm/CodeGen/RegisterScavenging.h
index 4f48ea2dc8e8..218e05f6eb6b 100644
--- a/llvm/include/llvm/CodeGen/RegisterScavenging.h
+++ b/llvm/include/llvm/CodeGen/RegisterScavenging.h
@@ -211,9 +211,6 @@ private:
   /// Initialize RegisterScavenger.
   void init(MachineBasicBlock &MBB);
 
-  /// Mark live-in registers of basic block as used.
-  void setLiveInsUsed(const MachineBasicBlock &MBB);
-
   /// Spill a register after position \p After and reload it before position
   /// \p UseMI.
   ScavengedInfo &spill(Register Reg, const TargetRegisterClass &RC, int SPAdj,
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index 948a4763b872..5a3f4e9a23ff 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -531,7 +531,7 @@ public:
   }
 
 #ifndef NDEBUG
-  void VerifyDAGDiverence();
+  void VerifyDAGDivergence();
 #endif
 
   /// This iterates over the nodes in the SelectionDAG, folding
@@ -621,8 +621,8 @@ public:
 
   SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget = false,
                              bool IsOpaque = false) {
-    return getConstant(APInt::getAllOnesValue(VT.getScalarSizeInBits()), DL,
-                       VT, IsTarget, IsOpaque);
+    return getConstant(APInt::getAllOnes(VT.getScalarSizeInBits()), DL, VT,
+                       IsTarget, IsOpaque);
   }
 
   SDValue getConstant(const ConstantInt &Val, const SDLoc &DL, EVT VT,
@@ -1307,6 +1307,74 @@ public:
   SDValue getIndexedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base,
                           SDValue Offset, ISD::MemIndexedMode AM);
 
+  SDValue getLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT,
+                    const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Offset,
+                    SDValue Mask, SDValue EVL, MachinePointerInfo PtrInfo,
+                    EVT MemVT, Align Alignment,
+                    MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo,
+                    const MDNode *Ranges = nullptr, bool IsExpanding = false);
+  inline SDValue
+  getLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT,
+            const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Offset,
+            SDValue Mask, SDValue EVL, MachinePointerInfo PtrInfo, EVT MemVT,
+            MaybeAlign Alignment = MaybeAlign(),
+            MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone,
+            const AAMDNodes &AAInfo = AAMDNodes(),
+            const MDNode *Ranges = nullptr, bool IsExpanding = false) {
+    // Ensures that codegen never sees a None Alignment.
+    return getLoadVP(AM, ExtType, VT, dl, Chain, Ptr, Offset, Mask, EVL,
+                     PtrInfo, MemVT, Alignment.getValueOr(getEVTAlign(MemVT)),
+                     MMOFlags, AAInfo, Ranges, IsExpanding);
+  }
+  SDValue getLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT,
+                    const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Offset,
+                    SDValue Mask, SDValue EVL, EVT MemVT,
+                    MachineMemOperand *MMO, bool IsExpanding = false);
+  SDValue getLoadVP(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr,
+                    SDValue Mask, SDValue EVL, MachinePointerInfo PtrInfo,
+                    MaybeAlign Alignment, MachineMemOperand::Flags MMOFlags,
+                    const AAMDNodes &AAInfo, const MDNode *Ranges = nullptr,
+                    bool IsExpanding = false);
+  SDValue getLoadVP(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr,
+                    SDValue Mask, SDValue EVL, MachineMemOperand *MMO,
+                    bool IsExpanding = false);
+  SDValue getExtLoadVP(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT,
+                       SDValue Chain, SDValue Ptr, SDValue Mask, SDValue EVL,
+                       MachinePointerInfo PtrInfo, EVT MemVT,
+                       MaybeAlign Alignment, MachineMemOperand::Flags MMOFlags,
+                       const AAMDNodes &AAInfo, bool IsExpanding = false);
+  SDValue getExtLoadVP(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT,
+                       SDValue Chain, SDValue Ptr, SDValue Mask, SDValue EVL,
+                       EVT MemVT, MachineMemOperand *MMO,
+                       bool IsExpanding = false);
+  SDValue getIndexedLoadVP(SDValue OrigLoad, const SDLoc &dl, SDValue Base,
+                           SDValue Offset, ISD::MemIndexedMode AM);
+  SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr,
+                     SDValue Mask, SDValue EVL, MachinePointerInfo PtrInfo,
+                     Align Alignment, MachineMemOperand::Flags MMOFlags,
+                     const AAMDNodes &AAInfo = AAMDNodes(),
+                     bool IsCompressing = false);
+  SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr,
+                     SDValue Mask, SDValue EVL, MachineMemOperand *MMO,
+                     bool IsCompressing = false);
+  SDValue getTruncStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val,
+                          SDValue Ptr, SDValue Mask, SDValue EVL,
+                          MachinePointerInfo PtrInfo, EVT SVT, Align Alignment,
+                          MachineMemOperand::Flags MMOFlags,
+                          const AAMDNodes &AAInfo, bool IsCompressing = false);
+  SDValue getTruncStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val,
+                          SDValue Ptr, SDValue Mask, SDValue EVL, EVT SVT,
+                          MachineMemOperand *MMO, bool IsCompressing = false);
+  SDValue getIndexedStoreVP(SDValue OrigStore, const SDLoc &dl, SDValue Base,
+                            SDValue Offset, ISD::MemIndexedMode AM);
+
+  SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl,
+                      ArrayRef<SDValue> Ops, MachineMemOperand *MMO,
+                      ISD::MemIndexType IndexType);
+  SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl,
+                       ArrayRef<SDValue> Ops, MachineMemOperand *MMO,
+                       ISD::MemIndexType IndexType);
+
   SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base,
                         SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT,
                         MachineMemOperand *MMO, ISD::MemIndexedMode AM,
@@ -1664,10 +1732,6 @@ public:
   SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT,
                                  ArrayRef<SDValue> Ops);
 
-  SDValue FoldConstantVectorArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT,
-                                       ArrayRef<SDValue> Ops,
-                                       const SDNodeFlags Flags = SDNodeFlags());
-
   /// Fold floating-point operations with 2 operands when both operands are
   /// constants and/or undefined.
   SDValue foldConstantFPMath(unsigned Opcode, const SDLoc &DL, EVT VT,
@@ -1769,6 +1833,19 @@ public:
   unsigned ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
                               unsigned Depth = 0) const;
 
+  /// Get the minimum bit size for this Value \p Op as a signed integer.
+  /// i.e.  x == sext(trunc(x to MinSignedBits) to bitwidth(x)).
+  /// Similar to the APInt::getMinSignedBits function.
+  /// Helper wrapper to ComputeNumSignBits.
+  unsigned ComputeMinSignedBits(SDValue Op, unsigned Depth = 0) const;
+
+  /// Get the minimum bit size for this Value \p Op as a signed integer.
+  /// i.e.  x == sext(trunc(x to MinSignedBits) to bitwidth(x)).
+  /// Similar to the APInt::getMinSignedBits function.
+  /// Helper wrapper to ComputeNumSignBits.
+  unsigned ComputeMinSignedBits(SDValue Op, const APInt &DemandedElts,
+                                unsigned Depth = 0) const;
+
   /// Return true if this function can prove that \p Op is never poison
   /// and, if \p PoisonOnly is false, does not have undef bits.
   bool isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly = false,
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h b/llvm/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h
index 4ee58333495b..6a3d76be0ed6 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h
@@ -50,6 +50,7 @@ public:
   SDValue getIndex() { return Index; }
   SDValue getIndex() const { return Index; }
   bool hasValidOffset() const { return Offset.hasValue(); }
+  int64_t getOffset() const { return *Offset; }
 
   // Returns true if `Other` and `*this` are both some offset from the same base
   // pointer. In that case, `Off` is set to the offset between `*this` and
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
index deeca98af3f3..2855e1f1e587 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -58,7 +58,6 @@ namespace llvm {
 
 class APInt;
 class Constant;
-template <typename T> struct DenseMapInfo;
 class GlobalValue;
 class MachineBasicBlock;
 class MachineConstantPoolValue;
@@ -509,15 +508,19 @@ BEGIN_TWO_BYTE_PACK()
 
   class LSBaseSDNodeBitfields {
     friend class LSBaseSDNode;
+    friend class VPLoadStoreSDNode;
     friend class MaskedLoadStoreSDNode;
     friend class MaskedGatherScatterSDNode;
+    friend class VPGatherScatterSDNode;
 
     uint16_t : NumMemSDNodeBits;
 
     // This storage is shared between disparate class hierarchies to hold an
     // enumeration specific to the class hierarchy in use.
     //   LSBaseSDNode => enum ISD::MemIndexedMode
+    //   VPLoadStoreBaseSDNode => enum ISD::MemIndexedMode
     //   MaskedLoadStoreBaseSDNode => enum ISD::MemIndexedMode
+    //   VPGatherScatterSDNode => enum ISD::MemIndexType
     //   MaskedGatherScatterSDNode => enum ISD::MemIndexType
     uint16_t AddressingMode : 3;
   };
@@ -525,8 +528,10 @@ BEGIN_TWO_BYTE_PACK()
 
   class LoadSDNodeBitfields {
     friend class LoadSDNode;
+    friend class VPLoadSDNode;
     friend class MaskedLoadSDNode;
     friend class MaskedGatherSDNode;
+    friend class VPGatherSDNode;
 
     uint16_t : NumLSBaseSDNodeBits;
 
@@ -536,8 +541,10 @@ BEGIN_TWO_BYTE_PACK()
 
   class StoreSDNodeBitfields {
     friend class StoreSDNode;
+    friend class VPStoreSDNode;
     friend class MaskedStoreSDNode;
     friend class MaskedScatterSDNode;
+    friend class VPScatterSDNode;
 
     uint16_t : NumLSBaseSDNodeBits;
 
@@ -1353,7 +1360,9 @@ public:
   const SDValue &getBasePtr() const {
     switch (getOpcode()) {
     case ISD::STORE:
+    case ISD::VP_STORE:
     case ISD::MSTORE:
+    case ISD::VP_SCATTER:
       return getOperand(2);
     case ISD::MGATHER:
     case ISD::MSCATTER:
@@ -1393,6 +1402,10 @@ public:
     case ISD::MSTORE:
     case ISD::MGATHER:
     case ISD::MSCATTER:
+    case ISD::VP_LOAD:
+    case ISD::VP_STORE:
+    case ISD::VP_GATHER:
+    case ISD::VP_SCATTER:
       return true;
     default:
       return N->isMemIntrinsic() || N->isTargetMemoryOpcode();
@@ -1563,8 +1576,12 @@ public:
   Align getAlignValue() const { return Value->getAlignValue(); }
 
   bool isOne() const { return Value->isOne(); }
-  bool isNullValue() const { return Value->isZero(); }
-  bool isAllOnesValue() const { return Value->isMinusOne(); }
+  bool isZero() const { return Value->isZero(); }
+  // NOTE: This is soft-deprecated.  Please use `isZero()` instead.
+  bool isNullValue() const { return isZero(); }
+  bool isAllOnes() const { return Value->isMinusOne(); }
+  // NOTE: This is soft-deprecated.  Please use `isAllOnes()` instead.
+  bool isAllOnesValue() const { return isAllOnes(); }
   bool isMaxSignedValue() const { return Value->isMaxValue(true); }
   bool isMinSignedValue() const { return Value->isMinValue(true); }
 
@@ -2031,8 +2048,25 @@ public:
   int32_t getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements,
                                           uint32_t BitWidth) const;
 
+  /// Extract the raw bit data from a build vector of Undef, Constant or
+  /// ConstantFP node elements. Each raw bit element will be \p
+  /// DstEltSizeInBits wide, undef elements are treated as zero, and entirely
+  /// undefined elements are flagged in \p UndefElements.
+  bool getConstantRawBits(bool IsLittleEndian, unsigned DstEltSizeInBits,
+                          SmallVectorImpl<APInt> &RawBitElements,
+                          BitVector &UndefElements) const;
+
   bool isConstant() const;
 
+  /// Recast bit data \p SrcBitElements to \p DstEltSizeInBits wide elements.
+  /// Undef elements are treated as zero, and entirely undefined elements are
+  /// flagged in \p DstUndefElements.
+  static void recastRawBits(bool IsLittleEndian, unsigned DstEltSizeInBits,
+                            SmallVectorImpl<APInt> &DstBitElements,
+                            ArrayRef<APInt> SrcBitElements,
+                            BitVector &DstUndefElements,
+                            const BitVector &SrcUndefElements);
+
   static bool classof(const SDNode *N) {
     return N->getOpcode() == ISD::BUILD_VECTOR;
   }
@@ -2318,6 +2352,116 @@ public:
   }
 };
 
+/// This base class is used to represent VP_LOAD and VP_STORE nodes
+class VPLoadStoreSDNode : public MemSDNode {
+public:
+  friend class SelectionDAG;
+
+  VPLoadStoreSDNode(ISD::NodeType NodeTy, unsigned Order, const DebugLoc &dl,
+                    SDVTList VTs, ISD::MemIndexedMode AM, EVT MemVT,
+                    MachineMemOperand *MMO)
+      : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
+    LSBaseSDNodeBits.AddressingMode = AM;
+    assert(getAddressingMode() == AM && "Value truncated");
+  }
+
+  // VPLoadSDNode (Chain, Ptr, Offset, Mask, EVL)
+  // VPStoreSDNode (Chain, Data, Ptr, Offset, Mask, EVL)
+  // Mask is a vector of i1 elements;
+  // the type of EVL is TLI.getVPExplicitVectorLengthTy().
+  const SDValue &getOffset() const {
+    return getOperand(getOpcode() == ISD::VP_LOAD ? 2 : 3);
+  }
+  const SDValue &getBasePtr() const {
+    return getOperand(getOpcode() == ISD::VP_LOAD ? 1 : 2);
+  }
+  const SDValue &getMask() const {
+    return getOperand(getOpcode() == ISD::VP_LOAD ? 3 : 4);
+  }
+  const SDValue &getVectorLength() const {
+    return getOperand(getOpcode() == ISD::VP_LOAD ? 4 : 5);
+  }
+
+  /// Return the addressing mode for this load or store:
+  /// unindexed, pre-inc, pre-dec, post-inc, or post-dec.
+  ISD::MemIndexedMode getAddressingMode() const {
+    return static_cast<ISD::MemIndexedMode>(LSBaseSDNodeBits.AddressingMode);
+  }
+
+  /// Return true if this is a pre/post inc/dec load/store.
+  bool isIndexed() const { return getAddressingMode() != ISD::UNINDEXED; }
+
+  /// Return true if this is NOT a pre/post inc/dec load/store.
+  bool isUnindexed() const { return getAddressingMode() == ISD::UNINDEXED; }
+
+  static bool classof(const SDNode *N) {
+    return N->getOpcode() == ISD::VP_LOAD || N->getOpcode() == ISD::VP_STORE;
+  }
+};
+
+/// This class is used to represent a VP_LOAD node
+class VPLoadSDNode : public VPLoadStoreSDNode {
+public:
+  friend class SelectionDAG;
+
+  VPLoadSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
+               ISD::MemIndexedMode AM, ISD::LoadExtType ETy, bool isExpanding,
+               EVT MemVT, MachineMemOperand *MMO)
+      : VPLoadStoreSDNode(ISD::VP_LOAD, Order, dl, VTs, AM, MemVT, MMO) {
+    LoadSDNodeBits.ExtTy = ETy;
+    LoadSDNodeBits.IsExpanding = isExpanding;
+  }
+
+  ISD::LoadExtType getExtensionType() const {
+    return static_cast<ISD::LoadExtType>(LoadSDNodeBits.ExtTy);
+  }
+
+  const SDValue &getBasePtr() const { return getOperand(1); }
+  const SDValue &getOffset() const { return getOperand(2); }
+  const SDValue &getMask() const { return getOperand(3); }
+  const SDValue &getVectorLength() const { return getOperand(4); }
+
+  static bool classof(const SDNode *N) {
+    return N->getOpcode() == ISD::VP_LOAD;
+  }
+  bool isExpandingLoad() const { return LoadSDNodeBits.IsExpanding; }
+};
+
+/// This class is used to represent a VP_STORE node
+class VPStoreSDNode : public VPLoadStoreSDNode {
+public:
+  friend class SelectionDAG;
+
+  VPStoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
+                ISD::MemIndexedMode AM, bool isTrunc, bool isCompressing,
+                EVT MemVT, MachineMemOperand *MMO)
+      : VPLoadStoreSDNode(ISD::VP_STORE, Order, dl, VTs, AM, MemVT, MMO) {
+    StoreSDNodeBits.IsTruncating = isTrunc;
+    StoreSDNodeBits.IsCompressing = isCompressing;
+  }
+
+  /// Return true if this is a truncating store.
+  /// For integers this is the same as doing a TRUNCATE and storing the result.
+  /// For floats, it is the same as doing an FP_ROUND and storing the result.
+  bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; }
+
+  /// Returns true if the op does a compression to the vector before storing.
+  /// The node contiguously stores the active elements (integers or floats)
+  /// in src (those with their respective bit set in writemask k) to unaligned
+  /// memory at base_addr.
+  bool isCompressingStore() const { return StoreSDNodeBits.IsCompressing; }
+
+  const SDValue &getValue() const { return getOperand(1); }
+  const SDValue &getBasePtr() const { return getOperand(2); }
+  const SDValue &getOffset() const { return getOperand(3); }
+  const SDValue &getMask() const { return getOperand(4); }
+  const SDValue &getVectorLength() const { return getOperand(5); }
+
+  static bool classof(const SDNode *N) {
+    return N->getOpcode() == ISD::VP_STORE;
+  }
+};
+
 /// This base class is used to represent MLOAD and MSTORE nodes
 class MaskedLoadStoreSDNode : public MemSDNode {
 public:
@@ -2423,6 +2567,94 @@ public:
   }
 };
 
+/// This is a base class used to represent
+/// VP_GATHER and VP_SCATTER nodes
+///
+class VPGatherScatterSDNode : public MemSDNode {
+public:
+  friend class SelectionDAG;
+
+  VPGatherScatterSDNode(ISD::NodeType NodeTy, unsigned Order,
+                        const DebugLoc &dl, SDVTList VTs, EVT MemVT,
+                        MachineMemOperand *MMO, ISD::MemIndexType IndexType)
+      : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
+    LSBaseSDNodeBits.AddressingMode = IndexType;
+    assert(getIndexType() == IndexType && "Value truncated");
+  }
+
+  /// How is Index applied to BasePtr when computing addresses.
+  ISD::MemIndexType getIndexType() const {
+    return static_cast<ISD::MemIndexType>(LSBaseSDNodeBits.AddressingMode);
+  }
+  bool isIndexScaled() const {
+    return (getIndexType() == ISD::SIGNED_SCALED) ||
+           (getIndexType() == ISD::UNSIGNED_SCALED);
+  }
+  bool isIndexSigned() const {
+    return (getIndexType() == ISD::SIGNED_SCALED) ||
+           (getIndexType() == ISD::SIGNED_UNSCALED);
+  }
+
+  // In the both nodes address is Op1, mask is Op2:
+  // VPGatherSDNode  (Chain, base, index, scale, mask, vlen)
+  // VPScatterSDNode (Chain, value, base, index, scale, mask, vlen)
+  // Mask is a vector of i1 elements
+  const SDValue &getBasePtr() const {
+    return getOperand((getOpcode() == ISD::VP_GATHER) ? 1 : 2);
+  }
+  const SDValue &getIndex() const {
+    return getOperand((getOpcode() == ISD::VP_GATHER) ? 2 : 3);
+  }
+  const SDValue &getScale() const {
+    return getOperand((getOpcode() == ISD::VP_GATHER) ? 3 : 4);
+  }
+  const SDValue &getMask() const {
+    return getOperand((getOpcode() == ISD::VP_GATHER) ? 4 : 5);
+  }
+  const SDValue &getVectorLength() const {
+    return getOperand((getOpcode() == ISD::VP_GATHER) ? 5 : 6);
+  }
+
+  static bool classof(const SDNode *N) {
+    return N->getOpcode() == ISD::VP_GATHER ||
+           N->getOpcode() == ISD::VP_SCATTER;
+  }
+};
+
+/// This class is used to represent an VP_GATHER node
+///
+class VPGatherSDNode : public VPGatherScatterSDNode {
+public:
+  friend class SelectionDAG;
+
+  VPGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, EVT MemVT,
+                 MachineMemOperand *MMO, ISD::MemIndexType IndexType)
+      : VPGatherScatterSDNode(ISD::VP_GATHER, Order, dl, VTs, MemVT, MMO,
+                              IndexType) {}
+
+  static bool classof(const SDNode *N) {
+    return N->getOpcode() == ISD::VP_GATHER;
+  }
+};
+
+/// This class is used to represent an VP_SCATTER node
+///
+class VPScatterSDNode : public VPGatherScatterSDNode {
+public:
+  friend class SelectionDAG;
+
+  VPScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, EVT MemVT,
+                  MachineMemOperand *MMO, ISD::MemIndexType IndexType)
+      : VPGatherScatterSDNode(ISD::VP_SCATTER, Order, dl, VTs, MemVT, MMO,
+                              IndexType) {}
+
+  const SDValue &getValue() const { return getOperand(1); }
+
+  static bool classof(const SDNode *N) {
+    return N->getOpcode() == ISD::VP_SCATTER;
+  }
+};
+
 /// This is a base class used to represent
 /// MGATHER and MSCATTER nodes
 ///
diff --git a/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h b/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h
index 51f1d7d6fd21..bc22d7789856 100644
--- a/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h
+++ b/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h
@@ -183,12 +183,12 @@ struct JumpTableHeader {
   const Value *SValue;
   MachineBasicBlock *HeaderBB;
   bool Emitted;
-  bool OmitRangeCheck;
+  bool FallthroughUnreachable;
 
   JumpTableHeader(APInt F, APInt L, const Value *SV, MachineBasicBlock *H,
                   bool E = false)
       : First(std::move(F)), Last(std::move(L)), SValue(SV), HeaderBB(H),
-        Emitted(E), OmitRangeCheck(false) {}
+        Emitted(E), FallthroughUnreachable(false) {}
 };
 using JumpTableBlock = std::pair<JumpTableHeader, JumpTable>;
 
@@ -218,14 +218,14 @@ struct BitTestBlock {
   BitTestInfo Cases;
   BranchProbability Prob;
   BranchProbability DefaultProb;
-  bool OmitRangeCheck;
+  bool FallthroughUnreachable;
 
   BitTestBlock(APInt F, APInt R, const Value *SV, unsigned Rg, MVT RgVT, bool E,
                bool CR, MachineBasicBlock *P, MachineBasicBlock *D,
                BitTestInfo C, BranchProbability Pr)
       : First(std::move(F)), Range(std::move(R)), SValue(SV), Reg(Rg),
         RegVT(RgVT), Emitted(E), ContiguousRange(CR), Parent(P), Default(D),
-        Cases(std::move(C)), Prob(Pr), OmitRangeCheck(false) {}
+        Cases(std::move(C)), Prob(Pr), FallthroughUnreachable(false) {}
 };
 
 /// Return the range of values within a range.
diff --git a/llvm/include/llvm/CodeGen/TargetCallingConv.h b/llvm/include/llvm/CodeGen/TargetCallingConv.h
index 29e644898f6b..7713dd0800c0 100644
--- a/llvm/include/llvm/CodeGen/TargetCallingConv.h
+++ b/llvm/include/llvm/CodeGen/TargetCallingConv.h
@@ -247,11 +247,11 @@ namespace ISD {
     unsigned PartOffset;
 
     OutputArg() = default;
-    OutputArg(ArgFlagsTy flags, EVT vt, EVT argvt, bool isfixed,
+    OutputArg(ArgFlagsTy flags, MVT vt, EVT argvt, bool isfixed,
               unsigned origIdx, unsigned partOffs)
-      : Flags(flags), IsFixed(isfixed), OrigArgIndex(origIdx),
-        PartOffset(partOffs) {
-      VT = vt.getSimpleVT();
+        : Flags(flags), IsFixed(isfixed), OrigArgIndex(origIdx),
+          PartOffset(partOffs) {
+      VT = vt;
       ArgVT = argvt;
     }
   };
diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index 05d0591f1e5d..8bc730a3eda5 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -411,9 +411,12 @@ public:
   /// This method returns a null pointer if the transformation cannot be
   /// performed, otherwise it returns the last new instruction.
   ///
-  virtual MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI,
-                                              MachineInstr &MI,
-                                              LiveVariables *LV) const {
+  /// If \p LIS is not nullptr, the LiveIntervals info should be updated for
+  /// replacing \p MI with new instructions, even though this function does not
+  /// remove MI.
+  virtual MachineInstr *convertToThreeAddress(MachineInstr &MI,
+                                              LiveVariables *LV,
+                                              LiveIntervals *LIS) const {
     return nullptr;
   }
 
@@ -583,15 +586,14 @@ public:
   }
 
   /// Insert an unconditional indirect branch at the end of \p MBB to \p
-  /// NewDestBB.  \p BrOffset indicates the offset of \p NewDestBB relative to
+  /// NewDestBB. Optionally, insert the clobbered register restoring in \p
+  /// RestoreBB. \p BrOffset indicates the offset of \p NewDestBB relative to
   /// the offset of the position to insert the new branch.
-  ///
-  /// \returns The number of bytes added to the block.
-  virtual unsigned insertIndirectBranch(MachineBasicBlock &MBB,
-                                        MachineBasicBlock &NewDestBB,
-                                        const DebugLoc &DL,
-                                        int64_t BrOffset = 0,
-                                        RegScavenger *RS = nullptr) const {
+  virtual void insertIndirectBranch(MachineBasicBlock &MBB,
+                                    MachineBasicBlock &NewDestBB,
+                                    MachineBasicBlock &RestoreBB,
+                                    const DebugLoc &DL, int64_t BrOffset = 0,
+                                    RegScavenger *RS = nullptr) const {
     llvm_unreachable("target did not implement");
   }
 
@@ -1537,7 +1539,8 @@ public:
   /// compares against in CmpValue. Return true if the comparison instruction
   /// can be analyzed.
   virtual bool analyzeCompare(const MachineInstr &MI, Register &SrcReg,
-                              Register &SrcReg2, int &Mask, int &Value) const {
+                              Register &SrcReg2, int64_t &Mask,
+                              int64_t &Value) const {
     return false;
   }
 
@@ -1545,7 +1548,8 @@ public:
   /// into something more efficient. E.g., on ARM most instructions can set the
   /// flags register, obviating the need for a separate CMP.
   virtual bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
-                                    Register SrcReg2, int Mask, int Value,
+                                    Register SrcReg2, int64_t Mask,
+                                    int64_t Value,
                                     const MachineRegisterInfo *MRI) const {
     return false;
   }
@@ -1624,9 +1628,6 @@ public:
   unsigned defaultDefLatency(const MCSchedModel &SchedModel,
                              const MachineInstr &DefMI) const;
 
-  int computeDefOperandLatency(const InstrItineraryData *ItinData,
-                               const MachineInstr &DefMI) const;
-
   /// Return true if this opcode has high latency to its result.
   virtual bool isHighLatencyDef(int opc) const { return false; }
 
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 692dc4d7d4cf..87f5168ec48f 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -30,6 +30,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/CodeGen/DAGCombine.h"
 #include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/LowLevelType.h"
 #include "llvm/CodeGen/RuntimeLibcalls.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -371,10 +372,18 @@ public:
     return getPointerTy(DL);
   }
 
-  /// EVT is not used in-tree, but is used by out-of-tree target.
-  /// A documentation for this function would be nice...
+  /// Return the type to use for a scalar shift opcode, given the shifted amount
+  /// type. Targets should return a legal type if the input type is legal.
+  /// Targets can return a type that is too small if the input type is illegal.
   virtual MVT getScalarShiftAmountTy(const DataLayout &, EVT) const;
 
+  /// Returns the type for the shift amount of a shift opcode. For vectors,
+  /// returns the input type. For scalars, behavior depends on \p LegalTypes. If
+  /// \p LegalTypes is true, calls getScalarShiftAmountTy, otherwise uses
+  /// pointer type. If getScalarShiftAmountTy or pointer type cannot represent
+  /// all possible shift amounts, returns MVT::i32. In general, \p LegalTypes
+  /// should be set to true for calls during type legalization and after type
+  /// legalization has been completed.
   EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL,
                        bool LegalTypes = true) const;
 
@@ -591,7 +600,7 @@ public:
 
   /// Returns if it's reasonable to merge stores to MemVT size.
   virtual bool canMergeStoresTo(unsigned AS, EVT MemVT,
-                                const SelectionDAG &DAG) const {
+                                const MachineFunction &MF) const {
     return true;
   }
 
@@ -1396,6 +1405,11 @@ public:
     return NVT;
   }
 
+  virtual EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty,
+                                     bool AllowUnknown = false) const {
+    return getValueType(DL, Ty, AllowUnknown);
+  }
+
   /// Return the EVT corresponding to this LLVM type.  This is fixed by the LLVM
   /// operations except for the pointer size.  If AllowUnknown is true, this
   /// will return MVT::Other for types with no EVT counterpart (e.g. structs),
@@ -1448,7 +1462,7 @@ public:
   /// Return the desired alignment for ByVal or InAlloca aggregate function
   /// arguments in the caller parameter area.  This is the actual alignment, not
   /// its logarithm.
-  virtual unsigned getByValTypeAlignment(Type *Ty, const DataLayout &DL) const;
+  virtual uint64_t getByValTypeAlignment(Type *Ty, const DataLayout &DL) const;
 
   /// Return the type of registers that this ValueType will eventually require.
   MVT getRegisterType(MVT VT) const {
@@ -1763,9 +1777,7 @@ public:
   Align getPrefFunctionAlignment() const { return PrefFunctionAlignment; }
 
   /// Return the preferred loop alignment.
-  virtual Align getPrefLoopAlignment(MachineLoop *ML = nullptr) const {
-    return PrefLoopAlignment;
-  }
+  virtual Align getPrefLoopAlignment(MachineLoop *ML = nullptr) const;
 
   /// Should loops be aligned even when the function is marked OptSize (but not
   /// MinSize).
@@ -2077,6 +2089,20 @@ public:
     return false;
   }
 
+  /// Return true if it may be profitable to transform
+  /// (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
+  /// This may not be true if c1 and c2 can be represented as immediates but
+  /// c1*c2 cannot, for example.
+  /// The target should check if c1, c2 and c1*c2 can be represented as
+  /// immediates, or have to be materialized into registers. If it is not sure
+  /// about some cases, a default true can be returned to let the DAGCombiner
+  /// decide.
+  /// AddNode is (add x, c1), and ConstNode is c2.
+  virtual bool isMulAddWithConstProfitable(const SDValue &AddNode,
+                                           const SDValue &ConstNode) const {
+    return true;
+  }
+
   /// Return true if it is more correct/profitable to use strict FP_TO_INT
   /// conversion operations - canonicalizing the FP source value instead of
   /// converting all cases and then selecting based on value.
@@ -2177,8 +2203,7 @@ protected:
   /// Indicate that the specified operation does not work with the specified
   /// type and indicate what to do about it. Note that VT may refer to either
   /// the type of a result or that of an operand of Op.
-  void setOperationAction(unsigned Op, MVT VT,
-                          LegalizeAction Action) {
+  void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action) {
     assert(Op < array_lengthof(OpActions[0]) && "Table isn't big enough!");
     OpActions[(unsigned)VT.SimpleTy][Op] = Action;
   }
@@ -2197,8 +2222,7 @@ protected:
 
   /// Indicate that the specified truncating store does not work with the
   /// specified type and indicate what to do about it.
-  void setTruncStoreAction(MVT ValVT, MVT MemVT,
-                           LegalizeAction Action) {
+  void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action) {
     assert(ValVT.isValid() && MemVT.isValid() && "Table isn't big enough!");
     TruncStoreActions[(unsigned)ValVT.SimpleTy][MemVT.SimpleTy] = Action;
   }
@@ -2506,8 +2530,11 @@ public:
     return false;
   }
 
-  virtual bool isTruncateFree(EVT FromVT, EVT ToVT) const {
-    return false;
+  virtual bool isTruncateFree(EVT FromVT, EVT ToVT) const { return false; }
+  virtual bool isTruncateFree(LLT FromTy, LLT ToTy, const DataLayout &DL,
+                              LLVMContext &Ctx) const {
+    return isTruncateFree(getApproximateEVTForLLT(FromTy, DL, Ctx),
+                          getApproximateEVTForLLT(ToTy, DL, Ctx));
   }
 
   virtual bool isProfitableToHoist(Instruction *I) const { return true; }
@@ -2583,8 +2610,11 @@ public:
     return false;
   }
 
-  virtual bool isZExtFree(EVT FromTy, EVT ToTy) const {
-    return false;
+  virtual bool isZExtFree(EVT FromTy, EVT ToTy) const { return false; }
+  virtual bool isZExtFree(LLT FromTy, LLT ToTy, const DataLayout &DL,
+                          LLVMContext &Ctx) const {
+    return isZExtFree(getApproximateEVTForLLT(FromTy, DL, Ctx),
+                      getApproximateEVTForLLT(ToTy, DL, Ctx));
   }
 
   /// Return true if sign-extension from FromTy to ToTy is cheaper than
@@ -3807,7 +3837,7 @@ public:
       RetSExt = Call.hasRetAttr(Attribute::SExt);
       RetZExt = Call.hasRetAttr(Attribute::ZExt);
       NoMerge = Call.hasFnAttr(Attribute::NoMerge);
-      
+
       Callee = Target;
 
       CallConv = Call.getCallingConv();
@@ -4424,33 +4454,29 @@ public:
   /// Expand CTPOP nodes. Expands vector/scalar CTPOP nodes,
   /// vector nodes can only succeed if all operations are legal/custom.
   /// \param N Node to expand
-  /// \param Result output after conversion
-  /// \returns True, if the expansion was successful, false otherwise
-  bool expandCTPOP(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
+  /// \returns The expansion result or SDValue() if it fails.
+  SDValue expandCTPOP(SDNode *N, SelectionDAG &DAG) const;
 
   /// Expand CTLZ/CTLZ_ZERO_UNDEF nodes. Expands vector/scalar CTLZ nodes,
   /// vector nodes can only succeed if all operations are legal/custom.
   /// \param N Node to expand
-  /// \param Result output after conversion
-  /// \returns True, if the expansion was successful, false otherwise
-  bool expandCTLZ(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
+  /// \returns The expansion result or SDValue() if it fails.
+  SDValue expandCTLZ(SDNode *N, SelectionDAG &DAG) const;
 
   /// Expand CTTZ/CTTZ_ZERO_UNDEF nodes. Expands vector/scalar CTTZ nodes,
   /// vector nodes can only succeed if all operations are legal/custom.
   /// \param N Node to expand
-  /// \param Result output after conversion
-  /// \returns True, if the expansion was successful, false otherwise
-  bool expandCTTZ(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
+  /// \returns The expansion result or SDValue() if it fails.
+  SDValue expandCTTZ(SDNode *N, SelectionDAG &DAG) const;
 
   /// Expand ABS nodes. Expands vector/scalar ABS nodes,
   /// vector nodes can only succeed if all operations are legal/custom.
   /// (ABS x) -> (XOR (ADD x, (SRA x, type_size)), (SRA x, type_size))
   /// \param N Node to expand
-  /// \param Result output after conversion
   /// \param IsNegative indicate negated abs
-  /// \returns True, if the expansion was successful, false otherwise
-  bool expandABS(SDNode *N, SDValue &Result, SelectionDAG &DAG,
-                 bool IsNegative = false) const;
+  /// \returns The expansion result or SDValue() if it fails.
+  SDValue expandABS(SDNode *N, SelectionDAG &DAG,
+                    bool IsNegative = false) const;
 
   /// Expand BSWAP nodes. Expands scalar/vector BSWAP nodes with i16/i32/i64
   /// scalar types. Returns SDValue() if expand fails.
diff --git a/llvm/include/llvm/CodeGen/TargetPassConfig.h b/llvm/include/llvm/CodeGen/TargetPassConfig.h
index 11138039a3c5..9b13b61fc9de 100644
--- a/llvm/include/llvm/CodeGen/TargetPassConfig.h
+++ b/llvm/include/llvm/CodeGen/TargetPassConfig.h
@@ -187,8 +187,7 @@ public:
   void substitutePass(AnalysisID StandardID, IdentifyingPassPtr TargetID);
 
   /// Insert InsertedPassID pass after TargetPassID pass.
-  void insertPass(AnalysisID TargetPassID, IdentifyingPassPtr InsertedPassID,
-                  bool VerifyAfter = true);
+  void insertPass(AnalysisID TargetPassID, IdentifyingPassPtr InsertedPassID);
 
   /// Allow the target to enable a specific standard pass by default.
   void enablePass(AnalysisID PassID) { substitutePass(PassID, PassID); }
@@ -323,8 +322,7 @@ public:
 
   /// Add standard passes after a pass that has just been added. For example,
   /// the MachineVerifier if it is enabled.
-  void addMachinePostPasses(const std::string &Banner, bool AllowVerify = true,
-                            bool AllowStrip = true);
+  void addMachinePostPasses(const std::string &Banner);
 
   /// Check whether or not GlobalISel should abort on error.
   /// When this is disabled, GlobalISel will fall back on SDISel instead of
@@ -449,16 +447,12 @@ protected:
 
   /// Add a CodeGen pass at this point in the pipeline after checking overrides.
   /// Return the pass that was added, or zero if no pass was added.
-  /// @p verifyAfter   if true and adding a machine function pass add an extra
-  ///                  machine verification pass afterwards.
-  AnalysisID addPass(AnalysisID PassID, bool verifyAfter = true);
+  AnalysisID addPass(AnalysisID PassID);
 
   /// Add a pass to the PassManager if that pass is supposed to be run, as
   /// determined by the StartAfter and StopAfter options. Takes ownership of the
   /// pass.
-  /// @p verifyAfter   if true and adding a machine function pass add an extra
-  ///                  machine verification pass afterwards.
-  void addPass(Pass *P, bool verifyAfter = true);
+  void addPass(Pass *P);
 
   /// addMachinePasses helper to create the target-selected or overriden
   /// regalloc pass.
diff --git a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
index 92ce5b737090..8483d078ca74 100644
--- a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
@@ -57,6 +57,8 @@ public:
   /// Classes with a higher priority value are assigned first by register
   /// allocators using a greedy heuristic. The value is in the range [0,63].
   const uint8_t AllocationPriority;
+  /// Configurable target specific flags.
+  const uint8_t TSFlags;
   /// Whether the class supports two (or more) disjunct subregister indices.
   const bool HasDisjunctSubRegs;
   /// Whether a combination of subregisters can cover every register in the
@@ -871,10 +873,6 @@ public:
   /// (3) Bottom-up allocation is no longer guaranteed to optimally color.
   virtual bool reverseLocalAssignment() const { return false; }
 
-  /// Add the allocation priority to global and split ranges as well as the
-  /// local ranges when registers are added to the queue.
-  virtual bool addAllocPriorityToGlobalRanges() const { return false; }
-
   /// Allow the target to override the cost of using a callee-saved register for
   /// the first time. Default value of 0 means we will use a callee-saved
   /// register if it is available.
diff --git a/llvm/include/llvm/CodeGen/TargetSchedule.h b/llvm/include/llvm/CodeGen/TargetSchedule.h
index aa6b82e14aa6..049ede89ab46 100644
--- a/llvm/include/llvm/CodeGen/TargetSchedule.h
+++ b/llvm/include/llvm/CodeGen/TargetSchedule.h
@@ -15,7 +15,6 @@
 #ifndef LLVM_CODEGEN_TARGETSCHEDULE_H
 #define LLVM_CODEGEN_TARGETSCHEDULE_H
 
-#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/Config/llvm-config.h"
diff --git a/llvm/include/llvm/CodeGen/ValueTypes.td b/llvm/include/llvm/CodeGen/ValueTypes.td
index 0e88e705e16b..7f989e08e9bf 100644
--- a/llvm/include/llvm/CodeGen/ValueTypes.td
+++ b/llvm/include/llvm/CodeGen/ValueTypes.td
@@ -216,6 +216,7 @@ def untyped   : ValueType<8,    174>;  // Produces an untyped value
 def funcref   : ValueType<0,    175>;  // WebAssembly's funcref type
 def externref : ValueType<0,    176>;  // WebAssembly's externref type
 def x86amx    : ValueType<8192, 177>;  // X86 AMX value
+def i64x8     : ValueType<512,  178>;  // 8 Consecutive GPRs (AArch64)
 
 
 def token      : ValueType<0, 248>;  // TokenTy
@@ -243,7 +244,7 @@ def Any        : ValueType<0, 255>;
 /// This class is for targets that want to use pointer types in patterns
 /// with the GlobalISelEmitter.  Targets must define their own pointer
 /// derived from this class.  The scalar argument should be an
-/// integer type with the same bit size as the ponter.
+/// integer type with the same bit size as the pointer.
 /// e.g. def p0 : PtrValueType <i64, 0>;
 
 class PtrValueType <ValueType scalar, int addrspace> :
diff --git a/llvm/include/llvm/DWARFLinker/DWARFLinker.h b/llvm/include/llvm/DWARFLinker/DWARFLinker.h
index 7b89c9f66f86..1c6d0b1ead86 100644
--- a/llvm/include/llvm/DWARFLinker/DWARFLinker.h
+++ b/llvm/include/llvm/DWARFLinker/DWARFLinker.h
@@ -80,7 +80,7 @@ public:
                                    CompileUnit::DIEInfo &Info) = 0;
 
   /// Apply the valid relocations to the buffer \p Data, taking into
-  /// account that Data is at \p BaseOffset in the debug_info section.
+  /// account that Data is at \p BaseOffset in the .debug_info section.
   ///
   /// \returns true whether any reloc has been applied.
   virtual bool applyValidRelocs(MutableArrayRef<char> Data, uint64_t BaseOffset,
@@ -109,7 +109,7 @@ public:
   /// Emit section named SecName with data SecData.
   virtual void emitSectionContents(StringRef SecData, StringRef SecName) = 0;
 
-  /// Emit the abbreviation table \p Abbrevs to the debug_abbrev section.
+  /// Emit the abbreviation table \p Abbrevs to the .debug_abbrev section.
   virtual void
   emitAbbrevs(const std::vector<std::unique_ptr<DIEAbbrev>> &Abbrevs,
               unsigned DwarfVersion) = 0;
@@ -137,7 +137,7 @@ public:
   virtual void
   emitAppleTypes(AccelTable<AppleAccelTableStaticTypeData> &Table) = 0;
 
-  /// Emit debug_ranges for \p FuncRange by translating the
+  /// Emit .debug_ranges for \p FuncRange by translating the
   /// original \p Entries.
   virtual void emitRangesEntries(
       int64_t UnitPcOffset, uint64_t OrigLowPc,
@@ -145,17 +145,17 @@ public:
       const std::vector<DWARFDebugRangeList::RangeListEntry> &Entries,
       unsigned AddressSize) = 0;
 
-  /// Emit debug_aranges entries for \p Unit and if \p DoRangesSection is true,
-  /// also emit the debug_ranges entries for the DW_TAG_compile_unit's
+  /// Emit .debug_aranges entries for \p Unit and if \p DoRangesSection is true,
+  /// also emit the .debug_ranges entries for the DW_TAG_compile_unit's
   /// DW_AT_ranges attribute.
   virtual void emitUnitRangesEntries(CompileUnit &Unit,
                                      bool DoRangesSection) = 0;
 
-  /// Copy the debug_line over to the updated binary while unobfuscating the
+  /// Copy the .debug_line over to the updated binary while unobfuscating the
   /// file names and directories.
   virtual void translateLineTable(DataExtractor LineData, uint64_t Offset) = 0;
 
-  /// Emit the line table described in \p Rows into the debug_line section.
+  /// Emit the line table described in \p Rows into the .debug_line section.
   virtual void emitLineTableForUnit(MCDwarfLineTableParams Params,
                                     StringRef PrologueBytes,
                                     unsigned MinInstLength,
@@ -175,7 +175,7 @@ public:
   virtual void emitFDE(uint32_t CIEOffset, uint32_t AddreSize, uint32_t Address,
                        StringRef Bytes) = 0;
 
-  /// Emit the debug_loc contribution for \p Unit by copying the entries from
+  /// Emit the .debug_loc contribution for \p Unit by copying the entries from
   /// \p Dwarf and offsetting them. Update the location attributes to point to
   /// the new entries.
   virtual void emitLocationsForUnit(
@@ -184,7 +184,7 @@ public:
           ProcessExpr) = 0;
 
   /// Emit the compilation unit header for \p Unit in the
-  /// debug_info section.
+  /// .debug_info section.
   ///
   /// As a side effect, this also switches the current Dwarf version
   /// of the MC layer to the one of U.getOrigUnit().
@@ -695,7 +695,7 @@ private:
   /// Assign an abbreviation number to \p Abbrev
   void assignAbbrev(DIEAbbrev &Abbrev);
 
-  /// Compute and emit debug_ranges section for \p Unit, and
+  /// Compute and emit .debug_ranges section for \p Unit, and
   /// patch the attributes referencing it.
   void patchRangesForUnit(const CompileUnit &Unit, DWARFContext &Dwarf,
                           const DWARFFile &File) const;
@@ -706,7 +706,7 @@ private:
 
   /// Extract the line tables from the original dwarf, extract the relevant
   /// parts according to the linked function ranges and emit the result in the
-  /// debug_line section.
+  /// .debug_line section.
   void patchLineTableForUnit(CompileUnit &Unit, DWARFContext &OrigDwarf,
                              const DWARFFile &File);
 
@@ -753,7 +753,7 @@ private:
   StringMap<uint32_t> EmittedCIEs;
 
   /// Offset of the last CIE that has been emitted in the output
-  /// debug_frame section.
+  /// .debug_frame section.
   uint32_t LastCIEOffset = 0;
 
   /// Apple accelerator tables.
diff --git a/llvm/include/llvm/DebugInfo/CodeView/CVRecord.h b/llvm/include/llvm/DebugInfo/CodeView/CVRecord.h
index 18392e3608e7..99de8ebef812 100644
--- a/llvm/include/llvm/DebugInfo/CodeView/CVRecord.h
+++ b/llvm/include/llvm/DebugInfo/CodeView/CVRecord.h
@@ -10,7 +10,6 @@
 #define LLVM_DEBUGINFO_CODEVIEW_CVRECORD_H
 
 #include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/Optional.h"
 #include "llvm/DebugInfo/CodeView/CodeView.h"
 #include "llvm/DebugInfo/CodeView/CodeViewError.h"
 #include "llvm/DebugInfo/CodeView/RecordSerialization.h"
diff --git a/llvm/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def b/llvm/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def
index 48ea7e52c172..4cee3abdde87 100644
--- a/llvm/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def
+++ b/llvm/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def
@@ -373,7 +373,7 @@ CV_REGISTER(AMD64_K7, 765)
 
 CV_REGISTER(ARM_NOREG, 0)
 
-// General purpose 32-bit integer regisers
+// General purpose 32-bit integer registers
 
 CV_REGISTER(ARM_R0, 10)
 CV_REGISTER(ARM_R1, 11)
diff --git a/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h b/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h
index bdc6cf46509b..226a436c0930 100644
--- a/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h
+++ b/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h
@@ -18,6 +18,7 @@
 namespace llvm {
 
 class ScopedPrinter;
+class StringRef;
 
 namespace codeview {
 
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h
index dcb26f12b13e..cdf3f60f88be 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h
@@ -144,6 +144,27 @@ public:
                                              const dwarf::Attribute Attr,
                                              const DWARFUnit &U) const;
 
+  /// Compute an offset from a DIE specified by DIE offset and attribute index.
+  ///
+  /// \param AttrIndex an index of DWARF attribute.
+  /// \param DIEOffset the DIE offset that points to the ULEB128 abbreviation
+  /// code in the .debug_info data.
+  /// \param U the DWARFUnit the contains the DIE.
+  /// \returns an offset of the attribute.
+  uint64_t getAttributeOffsetFromIndex(uint32_t AttrIndex, uint64_t DIEOffset,
+                                       const DWARFUnit &U) const;
+
+  /// Extract a DWARF form value from a DIE speccified by attribute index and
+  /// its offset.
+  ///
+  /// \param AttrIndex an index of DWARF attribute.
+  /// \param Offset offset of the attribute.
+  /// \param U the DWARFUnit the contains the DIE.
+  /// \returns Optional DWARF form value if the attribute was extracted.
+  Optional<DWARFFormValue>
+  getAttributeValueFromOffset(uint32_t AttrIndex, uint64_t Offset,
+                              const DWARFUnit &U) const;
+
   bool extract(DataExtractor Data, uint64_t* OffsetPtr);
   void dump(raw_ostream &OS) const;
 
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFAddressRange.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFAddressRange.h
index 154f7893aa17..537a03ec11fc 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFAddressRange.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFAddressRange.h
@@ -39,6 +39,8 @@ struct DWARFAddressRange {
   /// Returns true if [LowPC, HighPC) intersects with [RHS.LowPC, RHS.HighPC).
   bool intersects(const DWARFAddressRange &RHS) const {
     assert(valid() && RHS.valid());
+    if (SectionIndex != RHS.SectionIndex)
+      return false;
     // Empty ranges can't intersect.
     if (LowPC == HighPC || RHS.LowPC == RHS.HighPC)
       return false;
@@ -69,12 +71,12 @@ struct DWARFAddressRange {
 
 inline bool operator<(const DWARFAddressRange &LHS,
                       const DWARFAddressRange &RHS) {
-  return std::tie(LHS.LowPC, LHS.HighPC) < std::tie(RHS.LowPC, RHS.HighPC);
+  return std::tie(LHS.SectionIndex, LHS.LowPC, LHS.HighPC) < std::tie(RHS.SectionIndex, RHS.LowPC, RHS.HighPC);
 }
 
 inline bool operator==(const DWARFAddressRange &LHS,
                        const DWARFAddressRange &RHS) {
-  return std::tie(LHS.LowPC, LHS.HighPC) == std::tie(RHS.LowPC, RHS.HighPC);
+  return std::tie(LHS.SectionIndex, LHS.LowPC, LHS.HighPC) == std::tie(RHS.SectionIndex, RHS.LowPC, RHS.HighPC);
 }
 
 raw_ostream &operator<<(raw_ostream &OS, const DWARFAddressRange &R);
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h
index 75b2280658f1..902973ff5722 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h
@@ -243,6 +243,7 @@ public:
   }
 
   DWARFCompileUnit *getDWOCompileUnitForHash(uint64_t Hash);
+  DWARFTypeUnit *getTypeUnitForHash(uint16_t Version, uint64_t Hash, bool IsDWO);
 
   /// Return the compile unit that includes an offset (relative to .debug_info).
   DWARFCompileUnit *getCompileUnitForOffset(uint64_t Offset);
@@ -373,8 +374,24 @@ public:
     return {2, 4, 8};
   }
   static bool isAddressSizeSupported(unsigned AddressSize) {
-    return llvm::any_of(getSupportedAddressSizes(),
-                        [=](auto Elem) { return Elem == AddressSize; });
+    return llvm::is_contained(getSupportedAddressSizes(), AddressSize);
+  }
+  template <typename... Ts>
+  static Error checkAddressSizeSupported(unsigned AddressSize,
+                                         std::error_code EC, char const *Fmt,
+                                         const Ts &...Vals) {
+    if (isAddressSizeSupported(AddressSize))
+      return Error::success();
+    std::string Buffer;
+    raw_string_ostream Stream(Buffer);
+    Stream << format(Fmt, Vals...)
+           << " has unsupported address size: " << AddressSize
+           << " (supported are ";
+    ListSeparator LS;
+    for (unsigned Size : DWARFContext::getSupportedAddressSizes())
+      Stream << LS << Size;
+    Stream << ')';
+    return make_error<StringError>(Stream.str(), EC);
   }
 
   std::shared_ptr<DWARFContext> getDWOContext(StringRef AbsolutePath);
@@ -387,9 +404,12 @@ public:
 
   function_ref<void(Error)> getWarningHandler() { return WarningHandler; }
 
+  enum class ProcessDebugRelocations { Process, Ignore };
+
   static std::unique_ptr<DWARFContext>
-  create(const object::ObjectFile &Obj, const LoadedObjectInfo *L = nullptr,
-         std::string DWPName = "",
+  create(const object::ObjectFile &Obj,
+         ProcessDebugRelocations RelocAction = ProcessDebugRelocations::Process,
+         const LoadedObjectInfo *L = nullptr, std::string DWPName = "",
          std::function<void(Error)> RecoverableErrorHandler =
              WithColor::defaultErrorHandler,
          std::function<void(Error)> WarningHandler =
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h
index 0bfe9f376f46..c4370cb54113 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h
@@ -24,9 +24,11 @@ class DWARFDebugInfoEntry {
   /// Offset within the .debug_info of the start of this entry.
   uint64_t Offset = 0;
 
-  /// The integer depth of this DIE within the compile unit DIEs where the
-  /// compile/type unit DIE has a depth of zero.
-  uint32_t Depth = 0;
+  /// Index of the parent die. UINT32_MAX if there is no parent.
+  uint32_t ParentIdx = UINT32_MAX;
+
+  /// Index of the sibling die. Zero if there is no sibling.
+  uint32_t SiblingIdx = 0;
 
   const DWARFAbbreviationDeclaration *AbbrevDecl = nullptr;
 
@@ -36,15 +38,31 @@ public:
   /// Extracts a debug info entry, which is a child of a given unit,
   /// starting at a given offset. If DIE can't be extracted, returns false and
   /// doesn't change OffsetPtr.
-  bool extractFast(const DWARFUnit &U, uint64_t *OffsetPtr);
-
   /// High performance extraction should use this call.
   bool extractFast(const DWARFUnit &U, uint64_t *OffsetPtr,
                    const DWARFDataExtractor &DebugInfoData, uint64_t UEndOffset,
-                   uint32_t Depth);
+                   uint32_t ParentIdx);
 
   uint64_t getOffset() const { return Offset; }
-  uint32_t getDepth() const { return Depth; }
+
+  /// Returns index of the parent die.
+  Optional<uint32_t> getParentIdx() const {
+    if (ParentIdx == UINT32_MAX)
+      return None;
+
+    return ParentIdx;
+  }
+
+  /// Returns index of the sibling die.
+  Optional<uint32_t> getSiblingIdx() const {
+    if (SiblingIdx == 0)
+      return None;
+
+    return SiblingIdx;
+  }
+
+  /// Set index of sibling.
+  void setSiblingIdx(uint32_t Idx) { SiblingIdx = Idx; }
 
   dwarf::Tag getTag() const {
     return AbbrevDecl ? AbbrevDecl->getTag() : dwarf::DW_TAG_null;
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
index d1d65372740b..ee15b6d4112d 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
@@ -110,10 +110,6 @@ public:
     /// Length of the prologue in bytes.
     uint64_t getLength() const;
 
-    int32_t getMaxLineIncrementForSpecialOpcode() const {
-      return LineBase + (int8_t)LineRange - 1;
-    }
-
     /// Get DWARF-version aware access to the file name entry at the provided
     /// index.
     const llvm::DWARFDebugLine::FileNameEntry &
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h
index 2f72c642a2d5..0d9f37c5610b 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h
@@ -49,12 +49,7 @@ public:
     /// 2. An address, which defines the appropriate base address for
     /// use in interpreting the beginning and ending address offsets of
     /// subsequent entries of the location list.
-    bool isBaseAddressSelectionEntry(uint8_t AddressSize) const {
-      assert(AddressSize == 4 || AddressSize == 8);
-      if (AddressSize == 4)
-        return StartAddress == -1U;
-      return StartAddress == -1ULL;
-    }
+    bool isBaseAddressSelectionEntry(uint8_t AddressSize) const;
   };
 
 private:
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h
index 1903bab5e73f..8f93ebc4ebc0 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h
@@ -182,6 +182,8 @@ public:
   DWARFDie getAttributeValueAsReferencedDie(dwarf::Attribute Attr) const;
   DWARFDie getAttributeValueAsReferencedDie(const DWARFFormValue &V) const;
 
+  DWARFDie resolveTypeUnitReference() const;
+
   /// Extract the range base attribute from this DIE as absolute section offset.
   ///
   /// This is a utility function that checks for either the DW_AT_rnglists_base
@@ -220,16 +222,6 @@ public:
   /// information is available.
   Expected<DWARFAddressRangesVector> getAddressRanges() const;
 
-  /// Get all address ranges for any DW_TAG_subprogram DIEs in this DIE or any
-  /// of its children.
-  ///
-  /// Get the hi/low PC range if both attributes are available or exrtracts the
-  /// non-contiguous address ranges from the DW_AT_ranges attribute for this DIE
-  /// and all children.
-  ///
-  /// \param Ranges the addres range vector to fill in.
-  void collectChildrenAddressRanges(DWARFAddressRangesVector &Ranges) const;
-
   bool addressRangeContainsAddress(const uint64_t Address) const;
 
   Expected<DWARFLocationExpressionsVector>
@@ -246,6 +238,8 @@ public:
   /// for ShortName if LinkageName is not found.
   /// Returns null if no name is found.
   const char *getName(DINameKind Kind) const;
+  void getFullName(raw_string_ostream &,
+                   std::string *OriginalFullName = nullptr) const;
 
   /// Return the DIE short name resolving DW_AT_specification or
   /// DW_AT_abstract_origin references if necessary. Returns null if no name
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h
index 794e859bfe72..b694eeacfd9d 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h
@@ -86,24 +86,30 @@ public:
     uint64_t OperandEndOffsets[2];
 
   public:
-    Description &getDescription() { return Desc; }
-    uint8_t getCode() { return Opcode; }
-    uint64_t getRawOperand(unsigned Idx) { return Operands[Idx]; }
-    uint64_t getOperandEndOffset(unsigned Idx) { return OperandEndOffsets[Idx]; }
-    uint64_t getEndOffset() { return EndOffset; }
-    bool extract(DataExtractor Data, uint8_t AddressSize, uint64_t Offset,
-                 Optional<dwarf::DwarfFormat> Format);
-    bool isError() { return Error; }
+    const Description &getDescription() const { return Desc; }
+    uint8_t getCode() const { return Opcode; }
+    uint64_t getRawOperand(unsigned Idx) const { return Operands[Idx]; }
+    uint64_t getOperandEndOffset(unsigned Idx) const {
+      return OperandEndOffsets[Idx];
+    }
+    uint64_t getEndOffset() const { return EndOffset; }
+    bool isError() const { return Error; }
     bool print(raw_ostream &OS, DIDumpOptions DumpOpts,
                const DWARFExpression *Expr, const MCRegisterInfo *RegInfo,
-               DWARFUnit *U, bool isEH);
-    bool verify(DWARFUnit *U);
+               DWARFUnit *U, bool isEH) const;
+
+    /// Verify \p Op. Does not affect the return of \a isError().
+    static bool verify(const Operation &Op, DWARFUnit *U);
+
+  private:
+    bool extract(DataExtractor Data, uint8_t AddressSize, uint64_t Offset,
+                 Optional<dwarf::DwarfFormat> Format);
   };
 
   /// An iterator to go through the expression operations.
   class iterator
       : public iterator_facade_base<iterator, std::forward_iterator_tag,
-                                    Operation> {
+                                    const Operation> {
     friend class DWARFExpression;
     const DWARFExpression *Expr;
     uint64_t Offset;
@@ -116,19 +122,17 @@ public:
     }
 
   public:
-    class Operation &operator++() {
+    iterator &operator++() {
       Offset = Op.isError() ? Expr->Data.getData().size() : Op.EndOffset;
       Op.Error =
           Offset >= Expr->Data.getData().size() ||
           !Op.extract(Expr->Data, Expr->AddressSize, Offset, Expr->Format);
-      return Op;
+      return *this;
     }
 
-    class Operation &operator*() {
-      return Op;
-    }
+    const Operation &operator*() const { return Op; }
 
-    iterator skipBytes(uint64_t Add) {
+    iterator skipBytes(uint64_t Add) const {
       return iterator(Expr, Op.EndOffset + Add);
     }
 
@@ -159,6 +163,8 @@ public:
 
   bool operator==(const DWARFExpression &RHS) const;
 
+  StringRef getData() const { return Data.getData(); }
+
 private:
   DataExtractor Data;
   uint8_t AddressSize;
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h
index 43be024f0d37..3c051c3ea018 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h
@@ -102,10 +102,6 @@ public:
     return extractValue(Data, OffsetPtr, FormParams, nullptr, U);
   }
 
-  bool isInlinedCStr() const {
-    return Value.data != nullptr && Value.data == (const uint8_t *)Value.cstr;
-  }
-
   /// getAsFoo functions below return the extracted value as Foo if only
   /// DWARFFormValue has form class is suitable for representing Foo.
   Optional<uint64_t> getAsReference() const;
@@ -123,6 +119,19 @@ public:
   Optional<ArrayRef<uint8_t>> getAsBlock() const;
   Optional<uint64_t> getAsCStringOffset() const;
   Optional<uint64_t> getAsReferenceUVal() const;
+  /// Correctly extract any file paths from a form value.
+  ///
+  /// These attributes can be in the from DW_AT_decl_file or DW_AT_call_file
+  /// attributes. We need to use the file index in the correct DWARFUnit's line
+  /// table prologue, and each DWARFFormValue has the DWARFUnit the form value
+  /// was extracted from.
+  ///
+  /// \param Kind The kind of path to extract.
+  ///
+  /// \returns A valid string value on success, or llvm::None if the form class
+  /// is not FC_Constant, or if the file index is not valid.
+  Optional<std::string>
+  getAsFile(DILineInfoSpecifier::FileLineInfoKind Kind) const;
 
   /// Skip a form's value in \p DebugInfoData at the offset specified by
   /// \p OffsetPtr.
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h
index 93d7e2b563fd..d471b80c7fe1 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h
@@ -49,8 +49,6 @@ public:
     DieRangeInfo(std::vector<DWARFAddressRange> Ranges)
         : Ranges(std::move(Ranges)) {}
 
-    typedef std::vector<DWARFAddressRange>::const_iterator
-        address_range_iterator;
     typedef std::set<DieRangeInfo>::const_iterator die_range_info_iterator;
 
     /// Inserts the address range. If the range overlaps with an existing
@@ -62,16 +60,6 @@ public:
     /// children address ranges must all be contained in.
     Optional<DWARFAddressRange> insert(const DWARFAddressRange &R);
 
-    /// Finds an address range in the sorted vector of ranges.
-    address_range_iterator findRange(const DWARFAddressRange &R) const {
-      auto Begin = Ranges.begin();
-      auto End = Ranges.end();
-      auto Iter = std::upper_bound(Begin, End, R);
-      if (Iter != Begin)
-        --Iter;
-      return Iter;
-    }
-
     /// Inserts the address range info. If any of its ranges overlaps with a
     /// range in an existing range info, the range info is *not* added and an
     /// iterator to the overlapping range info.
@@ -91,14 +79,11 @@ private:
   raw_ostream &OS;
   DWARFContext &DCtx;
   DIDumpOptions DumpOpts;
-  /// A map that tracks all references (converted absolute references) so we
-  /// can verify each reference points to a valid DIE and not an offset that
-  /// lies between to valid DIEs.
-  std::map<uint64_t, std::set<uint64_t>> ReferenceToDIEOffsets;
   uint32_t NumDebugLineErrors = 0;
   // Used to relax some checks that do not currently work portably
   bool IsObjectFile;
   bool IsMachOObject;
+  using ReferenceMap = std::map<uint64_t, std::set<uint64_t>>;
 
   raw_ostream &error() const;
   raw_ostream &warn() const;
@@ -140,6 +125,7 @@ private:
   bool verifyUnitHeader(const DWARFDataExtractor DebugInfoData,
                         uint64_t *Offset, unsigned UnitIndex, uint8_t &UnitType,
                         bool &isUnitDWARF64);
+  bool verifyName(const DWARFDie &Die);
 
   /// Verifies the header of a unit in a .debug_info or .debug_types section.
   ///
@@ -156,7 +142,9 @@ private:
   /// \param Unit      The DWARF Unit to verify.
   ///
   /// \returns The number of errors that occurred during verification.
-  unsigned verifyUnitContents(DWARFUnit &Unit);
+  unsigned verifyUnitContents(DWARFUnit &Unit,
+                              ReferenceMap &UnitLocalReferences,
+                              ReferenceMap &CrossUnitReferences);
 
   /// Verifies the unit headers and contents in a .debug_info or .debug_types
   /// section.
@@ -208,7 +196,9 @@ private:
   ///
   /// \returns NumErrors The number of errors occurred during verification of
   /// attributes' forms in a unit
-  unsigned verifyDebugInfoForm(const DWARFDie &Die, DWARFAttribute &AttrValue);
+  unsigned verifyDebugInfoForm(const DWARFDie &Die, DWARFAttribute &AttrValue,
+                               ReferenceMap &UnitLocalReferences,
+                               ReferenceMap &CrossUnitReferences);
 
   /// Verifies the all valid references that were found when iterating through
   /// all of the DIE attributes.
@@ -220,7 +210,9 @@ private:
   ///
   /// \returns NumErrors The number of errors occurred during verification of
   /// references for the .debug_info and .debug_types sections
-  unsigned verifyDebugInfoReferences();
+  unsigned verifyDebugInfoReferences(
+      const ReferenceMap &,
+      llvm::function_ref<DWARFUnit *(uint64_t)> GetUnitForDieOffset);
 
   /// Verify the DW_AT_stmt_list encoding and value and ensure that no
   /// compile units that have the same DW_AT_stmt_list value.
diff --git a/llvm/include/llvm/DebugInfo/GSYM/StringTable.h b/llvm/include/llvm/DebugInfo/GSYM/StringTable.h
index f7f800d01647..045c9e3f3ebd 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/StringTable.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/StringTable.h
@@ -9,7 +9,6 @@
 #ifndef LLVM_DEBUGINFO_GSYM_STRINGTABLE_H
 #define LLVM_DEBUGINFO_GSYM_STRINGTABLE_H
 
-#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/DebugInfo/GSYM/Range.h"
 #include <stdint.h>
diff --git a/llvm/include/llvm/DebugInfo/MSF/MSFCommon.h b/llvm/include/llvm/DebugInfo/MSF/MSFCommon.h
index 83331b14b8af..a922839a999d 100644
--- a/llvm/include/llvm/DebugInfo/MSF/MSFCommon.h
+++ b/llvm/include/llvm/DebugInfo/MSF/MSFCommon.h
@@ -93,6 +93,9 @@ inline bool isValidBlockSize(uint32_t Size) {
   case 1024:
   case 2048:
   case 4096:
+  case 8192:
+  case 16384:
+  case 32768:
     return true;
   }
   return false;
diff --git a/llvm/include/llvm/DebugInfo/MSF/MappedBlockStream.h b/llvm/include/llvm/DebugInfo/MSF/MappedBlockStream.h
index 473c89e8106f..296a4840b779 100644
--- a/llvm/include/llvm/DebugInfo/MSF/MappedBlockStream.h
+++ b/llvm/include/llvm/DebugInfo/MSF/MappedBlockStream.h
@@ -58,12 +58,12 @@ public:
     return support::little;
   }
 
-  Error readBytes(uint32_t Offset, uint32_t Size,
+  Error readBytes(uint64_t Offset, uint64_t Size,
                   ArrayRef<uint8_t> &Buffer) override;
-  Error readLongestContiguousChunk(uint32_t Offset,
+  Error readLongestContiguousChunk(uint64_t Offset,
                                    ArrayRef<uint8_t> &Buffer) override;
 
-  uint32_t getLength() override;
+  uint64_t getLength() override;
 
   BumpPtrAllocator &getAllocator() { return Allocator; }
 
@@ -79,10 +79,10 @@ protected:
 
 private:
   const MSFStreamLayout &getStreamLayout() const { return StreamLayout; }
-  void fixCacheAfterWrite(uint32_t Offset, ArrayRef<uint8_t> Data) const;
+  void fixCacheAfterWrite(uint64_t Offset, ArrayRef<uint8_t> Data) const;
 
-  Error readBytes(uint32_t Offset, MutableArrayRef<uint8_t> Buffer);
-  bool tryReadContiguously(uint32_t Offset, uint32_t Size,
+  Error readBytes(uint64_t Offset, MutableArrayRef<uint8_t> Buffer);
+  bool tryReadContiguously(uint64_t Offset, uint64_t Size,
                            ArrayRef<uint8_t> &Buffer);
 
   const uint32_t BlockSize;
@@ -125,13 +125,13 @@ public:
     return support::little;
   }
 
-  Error readBytes(uint32_t Offset, uint32_t Size,
+  Error readBytes(uint64_t Offset, uint64_t Size,
                   ArrayRef<uint8_t> &Buffer) override;
-  Error readLongestContiguousChunk(uint32_t Offset,
+  Error readLongestContiguousChunk(uint64_t Offset,
                                    ArrayRef<uint8_t> &Buffer) override;
-  uint32_t getLength() override;
+  uint64_t getLength() override;
 
-  Error writeBytes(uint32_t Offset, ArrayRef<uint8_t> Buffer) override;
+  Error writeBytes(uint64_t Offset, ArrayRef<uint8_t> Buffer) override;
 
   Error commit() override;
 
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleList.h b/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleList.h
index 5fb13ad30e91..de5b46f21672 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleList.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleList.h
@@ -31,9 +31,7 @@ struct FileInfoSubstreamHeader;
 class DbiModuleSourceFilesIterator
     : public iterator_facade_base<DbiModuleSourceFilesIterator,
                                   std::random_access_iterator_tag, StringRef> {
-  using BaseType =
-      iterator_facade_base<DbiModuleSourceFilesIterator,
-                           std::random_access_iterator_tag, StringRef>;
+  using BaseType = typename DbiModuleSourceFilesIterator::iterator_facade_base;
 
 public:
   DbiModuleSourceFilesIterator(const DbiModuleList &Modules, uint32_t Modi,
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h b/llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h
index 95c0a89551ed..474bd796b2b3 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h
@@ -38,6 +38,7 @@ class HashTableIterator
     : public iterator_facade_base<HashTableIterator<ValueT>,
                                   std::forward_iterator_tag,
                                   const std::pair<uint32_t, ValueT>> {
+  using BaseT = typename HashTableIterator::iterator_facade_base;
   friend HashTable<ValueT>;
 
   HashTableIterator(const HashTable<ValueT> &Map, uint32_t Index,
@@ -76,9 +77,7 @@ public:
 
   // Implement postfix op++ in terms of prefix op++ by using the superclass
   // implementation.
-  using iterator_facade_base<HashTableIterator<ValueT>,
-                             std::forward_iterator_tag,
-                             const std::pair<uint32_t, ValueT>>::operator++;
+  using BaseT::operator++;
   HashTableIterator &operator++() {
     while (Index < Map->Buckets.size()) {
       ++Index;
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h b/llvm/include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h
index 1df059ffa9fd..f110e90b3f90 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h
@@ -9,7 +9,6 @@
 #ifndef LLVM_DEBUGINFO_PDB_NATIVE_NAMEDSTREAMMAP_H
 #define LLVM_DEBUGINFO_PDB_NATIVE_NAMEDSTREAMMAP_H
 
-#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/iterator_range.h"
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NativeLineNumber.h b/llvm/include/llvm/DebugInfo/PDB/Native/NativeLineNumber.h
index 5dedc70f11ba..be0ddf0a063a 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/NativeLineNumber.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/NativeLineNumber.h
@@ -9,7 +9,6 @@
 #ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVELINENUMBER_H
 #define LLVM_DEBUGINFO_PDB_NATIVE_NATIVELINENUMBER_H
 
-#include "llvm/ADT/Optional.h"
 #include "llvm/DebugInfo/CodeView/Line.h"
 #include "llvm/DebugInfo/PDB/IPDBLineNumber.h"
 #include "llvm/DebugInfo/PDB/Native/NativeSession.h"
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeFunctionSig.h b/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeFunctionSig.h
index 8f1834d0a2c2..90b5d8068959 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeFunctionSig.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeFunctionSig.h
@@ -9,7 +9,6 @@
 #ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEFUNCTIONSIG_H
 #define LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEFUNCTIONSIG_H
 
-#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/DebugInfo/CodeView/CodeView.h"
 #include "llvm/DebugInfo/CodeView/TypeRecord.h"
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeVTShape.h b/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeVTShape.h
index 4ae8f1471781..21995ca665c1 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeVTShape.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeVTShape.h
@@ -9,7 +9,6 @@
 #ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEVTSHAPE_H
 #define LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEVTSHAPE_H
 
-#include "llvm/ADT/Optional.h"
 #include "llvm/DebugInfo/CodeView/CodeView.h"
 #include "llvm/DebugInfo/CodeView/TypeRecord.h"
 #include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h"
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h b/llvm/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h
index 3c414e7a9005..004d005280d4 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h
@@ -10,7 +10,6 @@
 #define LLVM_DEBUGINFO_PDB_NATIVE_PDBFILEBUILDER_H
 
 #include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/DebugInfo/PDB/Native/NamedStreamMap.h"
 #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
diff --git a/llvm/include/llvm/Demangle/Demangle.h b/llvm/include/llvm/Demangle/Demangle.h
index c396a1dc5dd3..3150e049320b 100644
--- a/llvm/include/llvm/Demangle/Demangle.h
+++ b/llvm/include/llvm/Demangle/Demangle.h
@@ -31,7 +31,6 @@ enum : int {
 char *itaniumDemangle(const char *mangled_name, char *buf, size_t *n,
                       int *status);
 
-
 enum MSDemangleFlags {
   MSDF_None = 0,
   MSDF_DumpBackrefs = 1 << 0,
@@ -39,6 +38,7 @@ enum MSDemangleFlags {
   MSDF_NoCallingConvention = 1 << 2,
   MSDF_NoReturnType = 1 << 3,
   MSDF_NoMemberType = 1 << 4,
+  MSDF_NoVariableType = 1 << 5,
 };
 
 /// Demangles the Microsoft symbol pointed at by mangled_name and returns it.
@@ -53,13 +53,16 @@ enum MSDemangleFlags {
 /// receives the size of the demangled string on output if n_buf is not nullptr.
 /// status receives one of the demangle_ enum entries above if it's not nullptr.
 /// Flags controls various details of the demangled representation.
-char *microsoftDemangle(const char *mangled_name, size_t *n_read,
-                        char *buf, size_t *n_buf,
-                        int *status, MSDemangleFlags Flags = MSDF_None);
+char *microsoftDemangle(const char *mangled_name, size_t *n_read, char *buf,
+                        size_t *n_buf, int *status,
+                        MSDemangleFlags Flags = MSDF_None);
 
 // Demangles a Rust v0 mangled symbol. The API follows that of __cxa_demangle.
 char *rustDemangle(const char *MangledName, char *Buf, size_t *N, int *Status);
 
+// Demangles a D mangled symbol.
+char *dlangDemangle(const char *MangledName);
+
 /// Attempt to demangle a string using different demangling schemes.
 /// The function uses heuristics to determine which demangling scheme to use.
 /// \param MangledName - reference to string to demangle.
@@ -67,6 +70,8 @@ char *rustDemangle(const char *MangledName, char *Buf, size_t *N, int *Status);
 /// demangling occurred.
 std::string demangle(const std::string &MangledName);
 
+bool nonMicrosoftDemangle(const char *MangledName, std::string &Result);
+
 /// "Partial" demangler. This supports demangling a string into an AST
 /// (typically an intermediate stage in itaniumDemangle) and querying certain
 /// properties or partially printing the demangled name.
@@ -118,6 +123,7 @@ struct ItaniumPartialDemangler {
   bool isSpecialName() const;
 
   ~ItaniumPartialDemangler();
+
 private:
   void *RootNode;
   void *Context;
diff --git a/llvm/include/llvm/Demangle/ItaniumDemangle.h b/llvm/include/llvm/Demangle/ItaniumDemangle.h
index 9163b713d118..86f5c992b63d 100644
--- a/llvm/include/llvm/Demangle/ItaniumDemangle.h
+++ b/llvm/include/llvm/Demangle/ItaniumDemangle.h
@@ -57,6 +57,7 @@
     X(LocalName) \
     X(VectorType) \
     X(PixelVectorType) \
+    X(BinaryFPType) \
     X(SyntheticTemplateParamName) \
     X(TypeTemplateParamDecl) \
     X(NonTypeTemplateParamDecl) \
@@ -109,6 +110,126 @@
 
 DEMANGLE_NAMESPACE_BEGIN
 
+template <class T, size_t N> class PODSmallVector {
+  static_assert(std::is_pod<T>::value,
+                "T is required to be a plain old data type");
+
+  T *First = nullptr;
+  T *Last = nullptr;
+  T *Cap = nullptr;
+  T Inline[N] = {0};
+
+  bool isInline() const { return First == Inline; }
+
+  void clearInline() {
+    First = Inline;
+    Last = Inline;
+    Cap = Inline + N;
+  }
+
+  void reserve(size_t NewCap) {
+    size_t S = size();
+    if (isInline()) {
+      auto *Tmp = static_cast<T *>(std::malloc(NewCap * sizeof(T)));
+      if (Tmp == nullptr)
+        std::terminate();
+      std::copy(First, Last, Tmp);
+      First = Tmp;
+    } else {
+      First = static_cast<T *>(std::realloc(First, NewCap * sizeof(T)));
+      if (First == nullptr)
+        std::terminate();
+    }
+    Last = First + S;
+    Cap = First + NewCap;
+  }
+
+public:
+  PODSmallVector() : First(Inline), Last(First), Cap(Inline + N) {}
+
+  PODSmallVector(const PODSmallVector &) = delete;
+  PODSmallVector &operator=(const PODSmallVector &) = delete;
+
+  PODSmallVector(PODSmallVector &&Other) : PODSmallVector() {
+    if (Other.isInline()) {
+      std::copy(Other.begin(), Other.end(), First);
+      Last = First + Other.size();
+      Other.clear();
+      return;
+    }
+
+    First = Other.First;
+    Last = Other.Last;
+    Cap = Other.Cap;
+    Other.clearInline();
+  }
+
+  PODSmallVector &operator=(PODSmallVector &&Other) {
+    if (Other.isInline()) {
+      if (!isInline()) {
+        std::free(First);
+        clearInline();
+      }
+      std::copy(Other.begin(), Other.end(), First);
+      Last = First + Other.size();
+      Other.clear();
+      return *this;
+    }
+
+    if (isInline()) {
+      First = Other.First;
+      Last = Other.Last;
+      Cap = Other.Cap;
+      Other.clearInline();
+      return *this;
+    }
+
+    std::swap(First, Other.First);
+    std::swap(Last, Other.Last);
+    std::swap(Cap, Other.Cap);
+    Other.clear();
+    return *this;
+  }
+
+  // NOLINTNEXTLINE(readability-identifier-naming)
+  void push_back(const T &Elem) {
+    if (Last == Cap)
+      reserve(size() * 2);
+    *Last++ = Elem;
+  }
+
+  // NOLINTNEXTLINE(readability-identifier-naming)
+  void pop_back() {
+    assert(Last != First && "Popping empty vector!");
+    --Last;
+  }
+
+  void dropBack(size_t Index) {
+    assert(Index <= size() && "dropBack() can't expand!");
+    Last = First + Index;
+  }
+
+  T *begin() { return First; }
+  T *end() { return Last; }
+
+  bool empty() const { return First == Last; }
+  size_t size() const { return static_cast<size_t>(Last - First); }
+  T &back() {
+    assert(Last != First && "Calling back() on empty vector!");
+    return *(Last - 1);
+  }
+  T &operator[](size_t Index) {
+    assert(Index < size() && "Invalid access!");
+    return *(begin() + Index);
+  }
+  void clear() { Last = First; }
+
+  ~PODSmallVector() {
+    if (!isInline())
+      std::free(First);
+  }
+};
+
 // Base class of all AST nodes. The AST is built by the parser, then is
 // traversed by the printLeft/Right functions to produce a demangled string.
 class Node {
@@ -155,50 +276,48 @@ public:
   // would construct an equivalent node.
   //template<typename Fn> void match(Fn F) const;
 
-  bool hasRHSComponent(OutputStream &S) const {
+  bool hasRHSComponent(OutputBuffer &OB) const {
     if (RHSComponentCache != Cache::Unknown)
       return RHSComponentCache == Cache::Yes;
-    return hasRHSComponentSlow(S);
+    return hasRHSComponentSlow(OB);
   }
 
-  bool hasArray(OutputStream &S) const {
+  bool hasArray(OutputBuffer &OB) const {
     if (ArrayCache != Cache::Unknown)
       return ArrayCache == Cache::Yes;
-    return hasArraySlow(S);
+    return hasArraySlow(OB);
   }
 
-  bool hasFunction(OutputStream &S) const {
+  bool hasFunction(OutputBuffer &OB) const {
     if (FunctionCache != Cache::Unknown)
       return FunctionCache == Cache::Yes;
-    return hasFunctionSlow(S);
+    return hasFunctionSlow(OB);
   }
 
   Kind getKind() const { return K; }
 
-  virtual bool hasRHSComponentSlow(OutputStream &) const { return false; }
-  virtual bool hasArraySlow(OutputStream &) const { return false; }
-  virtual bool hasFunctionSlow(OutputStream &) const { return false; }
+  virtual bool hasRHSComponentSlow(OutputBuffer &) const { return false; }
+  virtual bool hasArraySlow(OutputBuffer &) const { return false; }
+  virtual bool hasFunctionSlow(OutputBuffer &) const { return false; }
 
   // Dig through "glue" nodes like ParameterPack and ForwardTemplateReference to
   // get at a node that actually represents some concrete syntax.
-  virtual const Node *getSyntaxNode(OutputStream &) const {
-    return this;
-  }
+  virtual const Node *getSyntaxNode(OutputBuffer &) const { return this; }
 
-  void print(OutputStream &S) const {
-    printLeft(S);
+  void print(OutputBuffer &OB) const {
+    printLeft(OB);
     if (RHSComponentCache != Cache::No)
-      printRight(S);
+      printRight(OB);
   }
 
-  // Print the "left" side of this Node into OutputStream.
-  virtual void printLeft(OutputStream &) const = 0;
+  // Print the "left" side of this Node into OutputBuffer.
+  virtual void printLeft(OutputBuffer &) const = 0;
 
   // Print the "right". This distinction is necessary to represent C++ types
   // that appear on the RHS of their subtype, such as arrays or functions.
   // Since most types don't have such a component, provide a default
   // implementation.
-  virtual void printRight(OutputStream &) const {}
+  virtual void printRight(OutputBuffer &) const {}
 
   virtual StringView getBaseName() const { return StringView(); }
 
@@ -227,19 +346,19 @@ public:
 
   Node *operator[](size_t Idx) const { return Elements[Idx]; }
 
-  void printWithComma(OutputStream &S) const {
+  void printWithComma(OutputBuffer &OB) const {
     bool FirstElement = true;
     for (size_t Idx = 0; Idx != NumElements; ++Idx) {
-      size_t BeforeComma = S.getCurrentPosition();
+      size_t BeforeComma = OB.getCurrentPosition();
       if (!FirstElement)
-        S += ", ";
-      size_t AfterComma = S.getCurrentPosition();
-      Elements[Idx]->print(S);
+        OB += ", ";
+      size_t AfterComma = OB.getCurrentPosition();
+      Elements[Idx]->print(OB);
 
       // Elements[Idx] is an empty parameter pack expansion, we should erase the
       // comma we just printed.
-      if (AfterComma == S.getCurrentPosition()) {
-        S.setCurrentPosition(BeforeComma);
+      if (AfterComma == OB.getCurrentPosition()) {
+        OB.setCurrentPosition(BeforeComma);
         continue;
       }
 
@@ -254,9 +373,7 @@ struct NodeArrayNode : Node {
 
   template<typename Fn> void match(Fn F) const { F(Array); }
 
-  void printLeft(OutputStream &S) const override {
-    Array.printWithComma(S);
-  }
+  void printLeft(OutputBuffer &OB) const override { Array.printWithComma(OB); }
 };
 
 class DotSuffix final : public Node {
@@ -269,11 +386,11 @@ public:
 
   template<typename Fn> void match(Fn F) const { F(Prefix, Suffix); }
 
-  void printLeft(OutputStream &s) const override {
-    Prefix->print(s);
-    s += " (";
-    s += Suffix;
-    s += ")";
+  void printLeft(OutputBuffer &OB) const override {
+    Prefix->print(OB);
+    OB += " (";
+    OB += Suffix;
+    OB += ")";
   }
 };
 
@@ -288,12 +405,12 @@ public:
 
   template <typename Fn> void match(Fn F) const { F(Ty, Ext, TA); }
 
-  void printLeft(OutputStream &S) const override {
-    Ty->print(S);
-    S += " ";
-    S += Ext;
+  void printLeft(OutputBuffer &OB) const override {
+    Ty->print(OB);
+    OB += " ";
+    OB += Ext;
     if (TA != nullptr)
-      TA->print(S);
+      TA->print(OB);
   }
 };
 
@@ -319,13 +436,13 @@ protected:
   const Qualifiers Quals;
   const Node *Child;
 
-  void printQuals(OutputStream &S) const {
+  void printQuals(OutputBuffer &OB) const {
     if (Quals & QualConst)
-      S += " const";
+      OB += " const";
     if (Quals & QualVolatile)
-      S += " volatile";
+      OB += " volatile";
     if (Quals & QualRestrict)
-      S += " restrict";
+      OB += " restrict";
   }
 
 public:
@@ -336,22 +453,22 @@ public:
 
   template<typename Fn> void match(Fn F) const { F(Child, Quals); }
 
-  bool hasRHSComponentSlow(OutputStream &S) const override {
-    return Child->hasRHSComponent(S);
+  bool hasRHSComponentSlow(OutputBuffer &OB) const override {
+    return Child->hasRHSComponent(OB);
   }
-  bool hasArraySlow(OutputStream &S) const override {
-    return Child->hasArray(S);
+  bool hasArraySlow(OutputBuffer &OB) const override {
+    return Child->hasArray(OB);
   }
-  bool hasFunctionSlow(OutputStream &S) const override {
-    return Child->hasFunction(S);
+  bool hasFunctionSlow(OutputBuffer &OB) const override {
+    return Child->hasFunction(OB);
   }
 
-  void printLeft(OutputStream &S) const override {
-    Child->printLeft(S);
-    printQuals(S);
+  void printLeft(OutputBuffer &OB) const override {
+    Child->printLeft(OB);
+    printQuals(OB);
   }
 
-  void printRight(OutputStream &S) const override { Child->printRight(S); }
+  void printRight(OutputBuffer &OB) const override { Child->printRight(OB); }
 };
 
 class ConversionOperatorType final : public Node {
@@ -363,9 +480,9 @@ public:
 
   template<typename Fn> void match(Fn F) const { F(Ty); }
 
-  void printLeft(OutputStream &S) const override {
-    S += "operator ";
-    Ty->print(S);
+  void printLeft(OutputBuffer &OB) const override {
+    OB += "operator ";
+    Ty->print(OB);
   }
 };
 
@@ -379,9 +496,9 @@ public:
 
   template<typename Fn> void match(Fn F) const { F(Ty, Postfix); }
 
-  void printLeft(OutputStream &s) const override {
-    Ty->printLeft(s);
-    s += Postfix;
+  void printLeft(OutputBuffer &OB) const override {
+    Ty->printLeft(OB);
+    OB += Postfix;
   }
 };
 
@@ -396,7 +513,7 @@ public:
   StringView getName() const { return Name; }
   StringView getBaseName() const override { return Name; }
 
-  void printLeft(OutputStream &s) const override { s += Name; }
+  void printLeft(OutputBuffer &OB) const override { OB += Name; }
 };
 
 class ElaboratedTypeSpefType : public Node {
@@ -408,10 +525,10 @@ public:
 
   template<typename Fn> void match(Fn F) const { F(Kind, Child); }
 
-  void printLeft(OutputStream &S) const override {
-    S += Kind;
-    S += ' ';
-    Child->print(S);
+  void printLeft(OutputBuffer &OB) const override {
+    OB += Kind;
+    OB += ' ';
+    Child->print(OB);
   }
 };
 
@@ -426,11 +543,11 @@ struct AbiTagAttr : Node {
 
   template<typename Fn> void match(Fn F) const { F(Base, Tag); }
 
-  void printLeft(OutputStream &S) const override {
-    Base->printLeft(S);
-    S += "[abi:";
-    S += Tag;
-    S += "]";
+  void printLeft(OutputBuffer &OB) const override {
+    Base->printLeft(OB);
+    OB += "[abi:";
+    OB += Tag;
+    OB += "]";
   }
 };
 
@@ -442,10 +559,10 @@ public:
 
   template<typename Fn> void match(Fn F) const { F(Conditions); }
 
-  void printLeft(OutputStream &S) const override {
-    S += " [enable_if:";
-    Conditions.printWithComma(S);
-    S += ']';
+  void printLeft(OutputBuffer &OB) const override {
+    OB += " [enable_if:";
+    Conditions.printWithComma(OB);
+    OB += ']';
   }
 };
 
@@ -466,11 +583,11 @@ public:
            static_cast<const NameType *>(Ty)->getName() == "objc_object";
   }
 
-  void printLeft(OutputStream &S) const override {
-    Ty->print(S);
-    S += "<";
-    S += Protocol;
-    S += ">";
+  void printLeft(OutputBuffer &OB) const override {
+    Ty->print(OB);
+    OB += "<";
+    OB += Protocol;
+    OB += ">";
   }
 };
 
@@ -484,34 +601,34 @@ public:
 
   template<typename Fn> void match(Fn F) const { F(Pointee); }
 
-  bool hasRHSComponentSlow(OutputStream &S) const override {
-    return Pointee->hasRHSComponent(S);
+  bool hasRHSComponentSlow(OutputBuffer &OB) const override {
+    return Pointee->hasRHSComponent(OB);
   }
 
-  void printLeft(OutputStream &s) const override {
+  void printLeft(OutputBuffer &OB) const override {
     // We rewrite objc_object<SomeProtocol>* into id<SomeProtocol>.
     if (Pointee->getKind() != KObjCProtoName ||
         !static_cast<const ObjCProtoName *>(Pointee)->isObjCObject()) {
-      Pointee->printLeft(s);
-      if (Pointee->hasArray(s))
-        s += " ";
-      if (Pointee->hasArray(s) || Pointee->hasFunction(s))
-        s += "(";
-      s += "*";
+      Pointee->printLeft(OB);
+      if (Pointee->hasArray(OB))
+        OB += " ";
+      if (Pointee->hasArray(OB) || Pointee->hasFunction(OB))
+        OB += "(";
+      OB += "*";
     } else {
       const auto *objcProto = static_cast<const ObjCProtoName *>(Pointee);
-      s += "id<";
-      s += objcProto->Protocol;
-      s += ">";
+      OB += "id<";
+      OB += objcProto->Protocol;
+      OB += ">";
     }
   }
 
-  void printRight(OutputStream &s) const override {
+  void printRight(OutputBuffer &OB) const override {
     if (Pointee->getKind() != KObjCProtoName ||
         !static_cast<const ObjCProtoName *>(Pointee)->isObjCObject()) {
-      if (Pointee->hasArray(s) || Pointee->hasFunction(s))
-        s += ")";
-      Pointee->printRight(s);
+      if (Pointee->hasArray(OB) || Pointee->hasFunction(OB))
+        OB += ")";
+      Pointee->printRight(OB);
     }
   }
 };
@@ -531,15 +648,30 @@ class ReferenceType : public Node {
   // Dig through any refs to refs, collapsing the ReferenceTypes as we go. The
   // rule here is rvalue ref to rvalue ref collapses to a rvalue ref, and any
   // other combination collapses to a lvalue ref.
-  std::pair<ReferenceKind, const Node *> collapse(OutputStream &S) const {
+  //
+  // A combination of a TemplateForwardReference and a back-ref Substitution
+  // from an ill-formed string may have created a cycle; use cycle detection to
+  // avoid looping forever.
+  std::pair<ReferenceKind, const Node *> collapse(OutputBuffer &OB) const {
     auto SoFar = std::make_pair(RK, Pointee);
+    // Track the chain of nodes for the Floyd's 'tortoise and hare'
+    // cycle-detection algorithm, since getSyntaxNode(S) is impure
+    PODSmallVector<const Node *, 8> Prev;
     for (;;) {
-      const Node *SN = SoFar.second->getSyntaxNode(S);
+      const Node *SN = SoFar.second->getSyntaxNode(OB);
       if (SN->getKind() != KReferenceType)
         break;
       auto *RT = static_cast<const ReferenceType *>(SN);
       SoFar.second = RT->Pointee;
       SoFar.first = std::min(SoFar.first, RT->RK);
+
+      // The middle of Prev is the 'slow' pointer moving at half speed
+      Prev.push_back(SoFar.second);
+      if (Prev.size() > 1 && SoFar.second == Prev[(Prev.size() - 1) / 2]) {
+        // Cycle detected
+        SoFar.second = nullptr;
+        break;
+      }
     }
     return SoFar;
   }
@@ -551,31 +683,35 @@ public:
 
   template<typename Fn> void match(Fn F) const { F(Pointee, RK); }
 
-  bool hasRHSComponentSlow(OutputStream &S) const override {
-    return Pointee->hasRHSComponent(S);
+  bool hasRHSComponentSlow(OutputBuffer &OB) const override {
+    return Pointee->hasRHSComponent(OB);
   }
 
-  void printLeft(OutputStream &s) const override {
+  void printLeft(OutputBuffer &OB) const override {
     if (Printing)
       return;
     SwapAndRestore<bool> SavePrinting(Printing, true);
-    std::pair<ReferenceKind, const Node *> Collapsed = collapse(s);
-    Collapsed.second->printLeft(s);
-    if (Collapsed.second->hasArray(s))
-      s += " ";
-    if (Collapsed.second->hasArray(s) || Collapsed.second->hasFunction(s))
-      s += "(";
+    std::pair<ReferenceKind, const Node *> Collapsed = collapse(OB);
+    if (!Collapsed.second)
+      return;
+    Collapsed.second->printLeft(OB);
+    if (Collapsed.second->hasArray(OB))
+      OB += " ";
+    if (Collapsed.second->hasArray(OB) || Collapsed.second->hasFunction(OB))
+      OB += "(";
 
-    s += (Collapsed.first == ReferenceKind::LValue ? "&" : "&&");
+    OB += (Collapsed.first == ReferenceKind::LValue ? "&" : "&&");
   }
-  void printRight(OutputStream &s) const override {
+  void printRight(OutputBuffer &OB) const override {
     if (Printing)
       return;
     SwapAndRestore<bool> SavePrinting(Printing, true);
-    std::pair<ReferenceKind, const Node *> Collapsed = collapse(s);
-    if (Collapsed.second->hasArray(s) || Collapsed.second->hasFunction(s))
-      s += ")";
-    Collapsed.second->printRight(s);
+    std::pair<ReferenceKind, const Node *> Collapsed = collapse(OB);
+    if (!Collapsed.second)
+      return;
+    if (Collapsed.second->hasArray(OB) || Collapsed.second->hasFunction(OB))
+      OB += ")";
+    Collapsed.second->printRight(OB);
   }
 };
 
@@ -590,24 +726,24 @@ public:
 
   template<typename Fn> void match(Fn F) const { F(ClassType, MemberType); }
 
-  bool hasRHSComponentSlow(OutputStream &S) const override {
-    return MemberType->hasRHSComponent(S);
+  bool hasRHSComponentSlow(OutputBuffer &OB) const override {
+    return MemberType->hasRHSComponent(OB);
   }
 
-  void printLeft(OutputStream &s) const override {
-    MemberType->printLeft(s);
-    if (MemberType->hasArray(s) || MemberType->hasFunction(s))
-      s += "(";
+  void printLeft(OutputBuffer &OB) const override {
+    MemberType->printLeft(OB);
+    if (MemberType->hasArray(OB) || MemberType->hasFunction(OB))
+      OB += "(";
     else
-      s += " ";
-    ClassType->print(s);
-    s += "::*";
+      OB += " ";
+    ClassType->print(OB);
+    OB += "::*";
   }
 
-  void printRight(OutputStream &s) const override {
-    if (MemberType->hasArray(s) || MemberType->hasFunction(s))
-      s += ")";
-    MemberType->printRight(s);
+  void printRight(OutputBuffer &OB) const override {
+    if (MemberType->hasArray(OB) || MemberType->hasFunction(OB))
+      OB += ")";
+    MemberType->printRight(OB);
   }
 };
 
@@ -624,19 +760,19 @@ public:
 
   template<typename Fn> void match(Fn F) const { F(Base, Dimension); }
 
-  bool hasRHSComponentSlow(OutputStream &) const override { return true; }
-  bool hasArraySlow(OutputStream &) const override { return true; }
+  bool hasRHSComponentSlow(OutputBuffer &) const override { return true; }
+  bool hasArraySlow(OutputBuffer &) const override { return true; }
 
-  void printLeft(OutputStream &S) const override { Base->printLeft(S); }
+  void printLeft(OutputBuffer &OB) const override { Base->printLeft(OB); }
 
-  void printRight(OutputStream &S) const override {
-    if (S.back() != ']')
-      S += " ";
-    S += "[";
+  void printRight(OutputBuffer &OB) const override {
+    if (OB.back() != ']')
+      OB += " ";
+    OB += "[";
     if (Dimension)
-      Dimension->print(S);
-    S += "]";
-    Base->printRight(S);
+      Dimension->print(OB);
+    OB += "]";
+    Base->printRight(OB);
   }
 };
 
@@ -660,8 +796,8 @@ public:
     F(Ret, Params, CVQuals, RefQual, ExceptionSpec);
   }
 
-  bool hasRHSComponentSlow(OutputStream &) const override { return true; }
-  bool hasFunctionSlow(OutputStream &) const override { return true; }
+  bool hasRHSComponentSlow(OutputBuffer &) const override { return true; }
+  bool hasFunctionSlow(OutputBuffer &) const override { return true; }
 
   // Handle C++'s ... quirky decl grammar by using the left & right
   // distinction. Consider:
@@ -670,32 +806,32 @@ public:
   // that takes a char and returns an int. If we're trying to print f, start
   // by printing out the return types's left, then print our parameters, then
   // finally print right of the return type.
-  void printLeft(OutputStream &S) const override {
-    Ret->printLeft(S);
-    S += " ";
+  void printLeft(OutputBuffer &OB) const override {
+    Ret->printLeft(OB);
+    OB += " ";
   }
 
-  void printRight(OutputStream &S) const override {
-    S += "(";
-    Params.printWithComma(S);
-    S += ")";
-    Ret->printRight(S);
+  void printRight(OutputBuffer &OB) const override {
+    OB += "(";
+    Params.printWithComma(OB);
+    OB += ")";
+    Ret->printRight(OB);
 
     if (CVQuals & QualConst)
-      S += " const";
+      OB += " const";
     if (CVQuals & QualVolatile)
-      S += " volatile";
+      OB += " volatile";
     if (CVQuals & QualRestrict)
-      S += " restrict";
+      OB += " restrict";
 
     if (RefQual == FrefQualLValue)
-      S += " &";
+      OB += " &";
     else if (RefQual == FrefQualRValue)
-      S += " &&";
+      OB += " &&";
 
     if (ExceptionSpec != nullptr) {
-      S += ' ';
-      ExceptionSpec->print(S);
+      OB += ' ';
+      ExceptionSpec->print(OB);
     }
   }
 };
@@ -707,10 +843,10 @@ public:
 
   template<typename Fn> void match(Fn F) const { F(E); }
 
-  void printLeft(OutputStream &S) const override {
-    S += "noexcept(";
-    E->print(S);
-    S += ")";
+  void printLeft(OutputBuffer &OB) const override {
+    OB += "noexcept(";
+    E->print(OB);
+    OB += ")";
   }
 };
 
@@ -722,10 +858,10 @@ public:
 
   template<typename Fn> void match(Fn F) const { F(Types); }
 
-  void printLeft(OutputStream &S) const override {
-    S += "throw(";
-    Types.printWithComma(S);
-    S += ')';
+  void printLeft(OutputBuffer &OB) const override {
+    OB += "throw(";
+    Types.printWithComma(OB);
+    OB += ')';
   }
 };
 
@@ -756,41 +892,41 @@ public:
   NodeArray getParams() const { return Params; }
   const Node *getReturnType() const { return Ret; }
 
-  bool hasRHSComponentSlow(OutputStream &) const override { return true; }
-  bool hasFunctionSlow(OutputStream &) const override { return true; }
+  bool hasRHSComponentSlow(OutputBuffer &) const override { return true; }
+  bool hasFunctionSlow(OutputBuffer &) const override { return true; }
 
   const Node *getName() const { return Name; }
 
-  void printLeft(OutputStream &S) const override {
+  void printLeft(OutputBuffer &OB) const override {
     if (Ret) {
-      Ret->printLeft(S);
-      if (!Ret->hasRHSComponent(S))
-        S += " ";
+      Ret->printLeft(OB);
+      if (!Ret->hasRHSComponent(OB))
+        OB += " ";
     }
-    Name->print(S);
+    Name->print(OB);
   }
 
-  void printRight(OutputStream &S) const override {
-    S += "(";
-    Params.printWithComma(S);
-    S += ")";
+  void printRight(OutputBuffer &OB) const override {
+    OB += "(";
+    Params.printWithComma(OB);
+    OB += ")";
     if (Ret)
-      Ret->printRight(S);
+      Ret->printRight(OB);
 
     if (CVQuals & QualConst)
-      S += " const";
+      OB += " const";
     if (CVQuals & QualVolatile)
-      S += " volatile";
+      OB += " volatile";
     if (CVQuals & QualRestrict)
-      S += " restrict";
+      OB += " restrict";
 
     if (RefQual == FrefQualLValue)
-      S += " &";
+      OB += " &";
     else if (RefQual == FrefQualRValue)
-      S += " &&";
+      OB += " &&";
 
     if (Attrs != nullptr)
-      Attrs->print(S);
+      Attrs->print(OB);
   }
 };
 
@@ -803,9 +939,9 @@ public:
 
   template<typename Fn> void match(Fn F) const { F(OpName); }
 
-  void printLeft(OutputStream &S) const override {
-    S += "operator\"\" ";
-    OpName->print(S);
+  void printLeft(OutputBuffer &OB) const override {
+    OB += "operator\"\" ";
+    OpName->print(OB);
   }
 };
 
@@ -819,9 +955,9 @@ public:
 
   template<typename Fn> void match(Fn F) const { F(Special, Child); }
 
-  void printLeft(OutputStream &S) const override {
-    S += Special;
-    Child->print(S);
+  void printLeft(OutputBuffer &OB) const override {
+    OB += Special;
+    Child->print(OB);
   }
 };
 
@@ -836,11 +972,11 @@ public:
 
   template<typename Fn> void match(Fn F) const { F(FirstType, SecondType); }
 
-  void printLeft(OutputStream &S) const override {
-    S += "construction vtable for ";
-    FirstType->print(S);
-    S += "-in-";
-    SecondType->print(S);
+  void printLeft(OutputBuffer &OB) const override {
+    OB += "construction vtable for ";
+    FirstType->print(OB);
+    OB += "-in-";
+    SecondType->print(OB);
   }
 };
 
@@ -855,10 +991,10 @@ struct NestedName : Node {
 
   StringView getBaseName() const override { return Name->getBaseName(); }
 
-  void printLeft(OutputStream &S) const override {
-    Qual->print(S);
-    S += "::";
-    Name->print(S);
+  void printLeft(OutputBuffer &OB) const override {
+    Qual->print(OB);
+    OB += "::";
+    Name->print(OB);
   }
 };
 
@@ -871,10 +1007,10 @@ struct LocalName : Node {
 
   template<typename Fn> void match(Fn F) const { F(Encoding, Entity); }
 
-  void printLeft(OutputStream &S) const override {
-    Encoding->print(S);
-    S += "::";
-    Entity->print(S);
+  void printLeft(OutputBuffer &OB) const override {
+    Encoding->print(OB);
+    OB += "::";
+    Entity->print(OB);
   }
 };
 
@@ -891,10 +1027,10 @@ public:
 
   StringView getBaseName() const override { return Name->getBaseName(); }
 
-  void printLeft(OutputStream &S) const override {
-    Qualifier->print(S);
-    S += "::";
-    Name->print(S);
+  void printLeft(OutputBuffer &OB) const override {
+    Qualifier->print(OB);
+    OB += "::";
+    Name->print(OB);
   }
 };
 
@@ -909,12 +1045,12 @@ public:
 
   template<typename Fn> void match(Fn F) const { F(BaseType, Dimension); }
 
-  void printLeft(OutputStream &S) const override {
-    BaseType->print(S);
-    S += " vector[";
+  void printLeft(OutputBuffer &OB) const override {
+    BaseType->print(OB);
+    OB += " vector[";
     if (Dimension)
-      Dimension->print(S);
-    S += "]";
+      Dimension->print(OB);
+    OB += "]";
   }
 };
 
@@ -927,11 +1063,26 @@ public:
 
   template<typename Fn> void match(Fn F) const { F(Dimension); }
 
-  void printLeft(OutputStream &S) const override {
+  void printLeft(OutputBuffer &OB) const override {
     // FIXME: This should demangle as "vector pixel".
-    S += "pixel vector[";
-    Dimension->print(S);
-    S += "]";
+    OB += "pixel vector[";
+    Dimension->print(OB);
+    OB += "]";
+  }
+};
+
+class BinaryFPType final : public Node {
+  const Node *Dimension;
+
+public:
+  BinaryFPType(const Node *Dimension_)
+      : Node(KBinaryFPType), Dimension(Dimension_) {}
+
+  template<typename Fn> void match(Fn F) const { F(Dimension); }
+
+  void printLeft(OutputBuffer &OB) const override {
+    OB += "_Float";
+    Dimension->print(OB);
   }
 };
 
@@ -953,20 +1104,20 @@ public:
 
   template<typename Fn> void match(Fn F) const { F(Kind, Index); }
 
-  void printLeft(OutputStream &S) const override {
+  void printLeft(OutputBuffer &OB) const override {
     switch (Kind) {
     case TemplateParamKind::Type:
-      S += "$T";
+      OB += "$T";
       break;
     case TemplateParamKind::NonType:
-      S += "$N";
+      OB += "$N";
       break;
     case TemplateParamKind::Template:
-      S += "$TT";
+      OB += "$TT";
       break;
     }
     if (Index > 0)
-      S << Index - 1;
+      OB << Index - 1;
   }
 };
 
@@ -980,13 +1131,9 @@ public:
 
   template<typename Fn> void match(Fn F) const { F(Name); }
 
-  void printLeft(OutputStream &S) const override {
-    S += "typename ";
-  }
+  void printLeft(OutputBuffer &OB) const override { OB += "typename "; }
 
-  void printRight(OutputStream &S) const override {
-    Name->print(S);
-  }
+  void printRight(OutputBuffer &OB) const override { Name->print(OB); }
 };
 
 /// A non-type template parameter declaration, 'int N'.
@@ -1000,15 +1147,15 @@ public:
 
   template<typename Fn> void match(Fn F) const { F(Name, Type); }
 
-  void printLeft(OutputStream &S) const override {
-    Type->printLeft(S);
-    if (!Type->hasRHSComponent(S))
-      S += " ";
+  void printLeft(OutputBuffer &OB) const override {
+    Type->printLeft(OB);
+    if (!Type->hasRHSComponent(OB))
+      OB += " ";
   }
 
-  void printRight(OutputStream &S) const override {
-    Name->print(S);
-    Type->printRight(S);
+  void printRight(OutputBuffer &OB) const override {
+    Name->print(OB);
+    Type->printRight(OB);
   }
 };
 
@@ -1025,15 +1172,13 @@ public:
 
   template<typename Fn> void match(Fn F) const { F(Name, Params); }
 
-  void printLeft(OutputStream &S) const override {
-    S += "template<";
-    Params.printWithComma(S);
-    S += "> typename ";
+  void printLeft(OutputBuffer &OB) const override {
+    OB += "template<";
+    Params.printWithComma(OB);
+    OB += "> typename ";
   }
 
-  void printRight(OutputStream &S) const override {
-    Name->print(S);
-  }
+  void printRight(OutputBuffer &OB) const override { Name->print(OB); }
 };
 
 /// A template parameter pack declaration, 'typename ...T'.
@@ -1046,14 +1191,12 @@ public:
 
   template<typename Fn> void match(Fn F) const { F(Param); }
 
-  void printLeft(OutputStream &S) const override {
-    Param->printLeft(S);
-    S += "...";
+  void printLeft(OutputBuffer &OB) const override {
+    Param->printLeft(OB);
+    OB += "...";
   }
 
-  void printRight(OutputStream &S) const override {
-    Param->printRight(S);
-  }
+  void printRight(OutputBuffer &OB) const override { Param->printRight(OB); }
 };
 
 /// An unexpanded parameter pack (either in the expression or type context). If
@@ -1067,11 +1210,11 @@ public:
 class ParameterPack final : public Node {
   NodeArray Data;
 
-  // Setup OutputStream for a pack expansion unless we're already expanding one.
-  void initializePackExpansion(OutputStream &S) const {
-    if (S.CurrentPackMax == std::numeric_limits<unsigned>::max()) {
-      S.CurrentPackMax = static_cast<unsigned>(Data.size());
-      S.CurrentPackIndex = 0;
+  // Setup OutputBuffer for a pack expansion unless we're already expanding one.
+  void initializePackExpansion(OutputBuffer &OB) const {
+    if (OB.CurrentPackMax == std::numeric_limits<unsigned>::max()) {
+      OB.CurrentPackMax = static_cast<unsigned>(Data.size());
+      OB.CurrentPackIndex = 0;
     }
   }
 
@@ -1094,38 +1237,38 @@ public:
 
   template<typename Fn> void match(Fn F) const { F(Data); }
 
-  bool hasRHSComponentSlow(OutputStream &S) const override {
-    initializePackExpansion(S);
-    size_t Idx = S.CurrentPackIndex;
-    return Idx < Data.size() && Data[Idx]->hasRHSComponent(S);
+  bool hasRHSComponentSlow(OutputBuffer &OB) const override {
+    initializePackExpansion(OB);
+    size_t Idx = OB.CurrentPackIndex;
+    return Idx < Data.size() && Data[Idx]->hasRHSComponent(OB);
   }
-  bool hasArraySlow(OutputStream &S) const override {
-    initializePackExpansion(S);
-    size_t Idx = S.CurrentPackIndex;
-    return Idx < Data.size() && Data[Idx]->hasArray(S);
+  bool hasArraySlow(OutputBuffer &OB) const override {
+    initializePackExpansion(OB);
+    size_t Idx = OB.CurrentPackIndex;
+    return Idx < Data.size() && Data[Idx]->hasArray(OB);
   }
-  bool hasFunctionSlow(OutputStream &S) const override {
-    initializePackExpansion(S);
-    size_t Idx = S.CurrentPackIndex;
-    return Idx < Data.size() && Data[Idx]->hasFunction(S);
+  bool hasFunctionSlow(OutputBuffer &OB) const override {
+    initializePackExpansion(OB);
+    size_t Idx = OB.CurrentPackIndex;
+    return Idx < Data.size() && Data[Idx]->hasFunction(OB);
   }
-  const Node *getSyntaxNode(OutputStream &S) const override {
-    initializePackExpansion(S);
-    size_t Idx = S.CurrentPackIndex;
-    return Idx < Data.size() ? Data[Idx]->getSyntaxNode(S) : this;
+  const Node *getSyntaxNode(OutputBuffer &OB) const override {
+    initializePackExpansion(OB);
+    size_t Idx = OB.CurrentPackIndex;
+    return Idx < Data.size() ? Data[Idx]->getSyntaxNode(OB) : this;
   }
 
-  void printLeft(OutputStream &S) const override {
-    initializePackExpansion(S);
-    size_t Idx = S.CurrentPackIndex;
+  void printLeft(OutputBuffer &OB) const override {
+    initializePackExpansion(OB);
+    size_t Idx = OB.CurrentPackIndex;
     if (Idx < Data.size())
-      Data[Idx]->printLeft(S);
+      Data[Idx]->printLeft(OB);
   }
-  void printRight(OutputStream &S) const override {
-    initializePackExpansion(S);
-    size_t Idx = S.CurrentPackIndex;
+  void printRight(OutputBuffer &OB) const override {
+    initializePackExpansion(OB);
+    size_t Idx = OB.CurrentPackIndex;
     if (Idx < Data.size())
-      Data[Idx]->printRight(S);
+      Data[Idx]->printRight(OB);
   }
 };
 
@@ -1144,8 +1287,8 @@ public:
 
   NodeArray getElements() const { return Elements; }
 
-  void printLeft(OutputStream &S) const override {
-    Elements.printWithComma(S);
+  void printLeft(OutputBuffer &OB) const override {
+    Elements.printWithComma(OB);
   }
 };
 
@@ -1162,35 +1305,35 @@ public:
 
   const Node *getChild() const { return Child; }
 
-  void printLeft(OutputStream &S) const override {
+  void printLeft(OutputBuffer &OB) const override {
     constexpr unsigned Max = std::numeric_limits<unsigned>::max();
-    SwapAndRestore<unsigned> SavePackIdx(S.CurrentPackIndex, Max);
-    SwapAndRestore<unsigned> SavePackMax(S.CurrentPackMax, Max);
-    size_t StreamPos = S.getCurrentPosition();
+    SwapAndRestore<unsigned> SavePackIdx(OB.CurrentPackIndex, Max);
+    SwapAndRestore<unsigned> SavePackMax(OB.CurrentPackMax, Max);
+    size_t StreamPos = OB.getCurrentPosition();
 
     // Print the first element in the pack. If Child contains a ParameterPack,
     // it will set up S.CurrentPackMax and print the first element.
-    Child->print(S);
+    Child->print(OB);
 
     // No ParameterPack was found in Child. This can occur if we've found a pack
     // expansion on a <function-param>.
-    if (S.CurrentPackMax == Max) {
-      S += "...";
+    if (OB.CurrentPackMax == Max) {
+      OB += "...";
       return;
     }
 
     // We found a ParameterPack, but it has no elements. Erase whatever we may
     // of printed.
-    if (S.CurrentPackMax == 0) {
-      S.setCurrentPosition(StreamPos);
+    if (OB.CurrentPackMax == 0) {
+      OB.setCurrentPosition(StreamPos);
       return;
     }
 
     // Else, iterate through the rest of the elements in the pack.
-    for (unsigned I = 1, E = S.CurrentPackMax; I < E; ++I) {
-      S += ", ";
-      S.CurrentPackIndex = I;
-      Child->print(S);
+    for (unsigned I = 1, E = OB.CurrentPackMax; I < E; ++I) {
+      OB += ", ";
+      OB.CurrentPackIndex = I;
+      Child->print(OB);
     }
   }
 };
@@ -1205,12 +1348,12 @@ public:
 
   NodeArray getParams() { return Params; }
 
-  void printLeft(OutputStream &S) const override {
-    S += "<";
-    Params.printWithComma(S);
-    if (S.back() == '>')
-      S += " ";
-    S += ">";
+  void printLeft(OutputBuffer &OB) const override {
+    OB += "<";
+    Params.printWithComma(OB);
+    if (OB.back() == '>')
+      OB += " ";
+    OB += ">";
   }
 };
 
@@ -1252,42 +1395,42 @@ struct ForwardTemplateReference : Node {
   // special handling.
   template<typename Fn> void match(Fn F) const = delete;
 
-  bool hasRHSComponentSlow(OutputStream &S) const override {
+  bool hasRHSComponentSlow(OutputBuffer &OB) const override {
     if (Printing)
       return false;
     SwapAndRestore<bool> SavePrinting(Printing, true);
-    return Ref->hasRHSComponent(S);
+    return Ref->hasRHSComponent(OB);
   }
-  bool hasArraySlow(OutputStream &S) const override {
+  bool hasArraySlow(OutputBuffer &OB) const override {
     if (Printing)
       return false;
     SwapAndRestore<bool> SavePrinting(Printing, true);
-    return Ref->hasArray(S);
+    return Ref->hasArray(OB);
   }
-  bool hasFunctionSlow(OutputStream &S) const override {
+  bool hasFunctionSlow(OutputBuffer &OB) const override {
     if (Printing)
       return false;
     SwapAndRestore<bool> SavePrinting(Printing, true);
-    return Ref->hasFunction(S);
+    return Ref->hasFunction(OB);
   }
-  const Node *getSyntaxNode(OutputStream &S) const override {
+  const Node *getSyntaxNode(OutputBuffer &OB) const override {
     if (Printing)
       return this;
     SwapAndRestore<bool> SavePrinting(Printing, true);
-    return Ref->getSyntaxNode(S);
+    return Ref->getSyntaxNode(OB);
   }
 
-  void printLeft(OutputStream &S) const override {
+  void printLeft(OutputBuffer &OB) const override {
     if (Printing)
       return;
     SwapAndRestore<bool> SavePrinting(Printing, true);
-    Ref->printLeft(S);
+    Ref->printLeft(OB);
   }
-  void printRight(OutputStream &S) const override {
+  void printRight(OutputBuffer &OB) const override {
     if (Printing)
       return;
     SwapAndRestore<bool> SavePrinting(Printing, true);
-    Ref->printRight(S);
+    Ref->printRight(OB);
   }
 };
 
@@ -1303,9 +1446,9 @@ struct NameWithTemplateArgs : Node {
 
   StringView getBaseName() const override { return Name->getBaseName(); }
 
-  void printLeft(OutputStream &S) const override {
-    Name->print(S);
-    TemplateArgs->print(S);
+  void printLeft(OutputBuffer &OB) const override {
+    Name->print(OB);
+    TemplateArgs->print(OB);
   }
 };
 
@@ -1320,9 +1463,9 @@ public:
 
   StringView getBaseName() const override { return Child->getBaseName(); }
 
-  void printLeft(OutputStream &S) const override {
-    S += "::";
-    Child->print(S);
+  void printLeft(OutputBuffer &OB) const override {
+    OB += "::";
+    Child->print(OB);
   }
 };
 
@@ -1335,9 +1478,9 @@ struct StdQualifiedName : Node {
 
   StringView getBaseName() const override { return Child->getBaseName(); }
 
-  void printLeft(OutputStream &S) const override {
-    S += "std::";
-    Child->print(S);
+  void printLeft(OutputBuffer &OB) const override {
+    OB += "std::";
+    Child->print(OB);
   }
 };
 
@@ -1377,26 +1520,26 @@ public:
     DEMANGLE_UNREACHABLE;
   }
 
-  void printLeft(OutputStream &S) const override {
+  void printLeft(OutputBuffer &OB) const override {
     switch (SSK) {
     case SpecialSubKind::allocator:
-      S += "std::allocator";
+      OB += "std::allocator";
       break;
     case SpecialSubKind::basic_string:
-      S += "std::basic_string";
+      OB += "std::basic_string";
       break;
     case SpecialSubKind::string:
-      S += "std::basic_string<char, std::char_traits<char>, "
-           "std::allocator<char> >";
+      OB += "std::basic_string<char, std::char_traits<char>, "
+            "std::allocator<char> >";
       break;
     case SpecialSubKind::istream:
-      S += "std::basic_istream<char, std::char_traits<char> >";
+      OB += "std::basic_istream<char, std::char_traits<char> >";
       break;
     case SpecialSubKind::ostream:
-      S += "std::basic_ostream<char, std::char_traits<char> >";
+      OB += "std::basic_ostream<char, std::char_traits<char> >";
       break;
     case SpecialSubKind::iostream:
-      S += "std::basic_iostream<char, std::char_traits<char> >";
+      OB += "std::basic_iostream<char, std::char_traits<char> >";
       break;
     }
   }
@@ -1429,25 +1572,25 @@ public:
     DEMANGLE_UNREACHABLE;
   }
 
-  void printLeft(OutputStream &S) const override {
+  void printLeft(OutputBuffer &OB) const override {
     switch (SSK) {
     case SpecialSubKind::allocator:
-      S += "std::allocator";
+      OB += "std::allocator";
       break;
     case SpecialSubKind::basic_string:
-      S += "std::basic_string";
+      OB += "std::basic_string";
       break;
     case SpecialSubKind::string:
-      S += "std::string";
+      OB += "std::string";
       break;
     case SpecialSubKind::istream:
-      S += "std::istream";
+      OB += "std::istream";
       break;
     case SpecialSubKind::ostream:
-      S += "std::ostream";
+      OB += "std::ostream";
       break;
     case SpecialSubKind::iostream:
-      S += "std::iostream";
+      OB += "std::iostream";
       break;
     }
   }
@@ -1465,10 +1608,10 @@ public:
 
   template<typename Fn> void match(Fn F) const { F(Basename, IsDtor, Variant); }
 
-  void printLeft(OutputStream &S) const override {
+  void printLeft(OutputBuffer &OB) const override {
     if (IsDtor)
-      S += "~";
-    S += Basename->getBaseName();
+      OB += "~";
+    OB += Basename->getBaseName();
   }
 };
 
@@ -1480,9 +1623,9 @@ public:
 
   template<typename Fn> void match(Fn F) const { F(Base); }
 
-  void printLeft(OutputStream &S) const override {
-    S += "~";
-    Base->printLeft(S);
+  void printLeft(OutputBuffer &OB) const override {
+    OB += "~";
+    Base->printLeft(OB);
   }
 };
 
@@ -1494,10 +1637,10 @@ public:
 
   template<typename Fn> void match(Fn F) const { F(Count); }
 
-  void printLeft(OutputStream &S) const override {
-    S += "'unnamed";
-    S += Count;
-    S += "\'";
+  void printLeft(OutputBuffer &OB) const override {
+    OB += "'unnamed";
+    OB += Count;
+    OB += "\'";
   }
 };
 
@@ -1516,22 +1659,22 @@ public:
     F(TemplateParams, Params, Count);
   }
 
-  void printDeclarator(OutputStream &S) const {
+  void printDeclarator(OutputBuffer &OB) const {
     if (!TemplateParams.empty()) {
-      S += "<";
-      TemplateParams.printWithComma(S);
-      S += ">";
+      OB += "<";
+      TemplateParams.printWithComma(OB);
+      OB += ">";
     }
-    S += "(";
-    Params.printWithComma(S);
-    S += ")";
+    OB += "(";
+    Params.printWithComma(OB);
+    OB += ")";
   }
 
-  void printLeft(OutputStream &S) const override {
-    S += "\'lambda";
-    S += Count;
-    S += "\'";
-    printDeclarator(S);
+  void printLeft(OutputBuffer &OB) const override {
+    OB += "\'lambda";
+    OB += Count;
+    OB += "\'";
+    printDeclarator(OB);
   }
 };
 
@@ -1543,10 +1686,10 @@ public:
 
   template<typename Fn> void match(Fn F) const { F(Bindings); }
 
-  void printLeft(OutputStream &S) const override {
-    S += '[';
-    Bindings.printWithComma(S);
-    S += ']';
+  void printLeft(OutputBuffer &OB) const override {
+    OB += '[';
+    Bindings.printWithComma(OB);
+    OB += ']';
   }
 };
 
@@ -1564,22 +1707,22 @@ public:
 
   template<typename Fn> void match(Fn F) const { F(LHS, InfixOperator, RHS); }
 
-  void printLeft(OutputStream &S) const override {
+  void printLeft(OutputBuffer &OB) const override {
     // might be a template argument expression, then we need to disambiguate
     // with parens.
     if (InfixOperator == ">")
-      S += "(";
+      OB += "(";
 
-    S += "(";
-    LHS->print(S);
-    S += ") ";
-    S += InfixOperator;
-    S += " (";
-    RHS->print(S);
-    S += ")";
+    OB += "(";
+    LHS->print(OB);
+    OB += ") ";
+    OB += InfixOperator;
+    OB += " (";
+    RHS->print(OB);
+    OB += ")";
 
     if (InfixOperator == ">")
-      S += ")";
+      OB += ")";
   }
 };
 
@@ -1593,12 +1736,12 @@ public:
 
   template<typename Fn> void match(Fn F) const { F(Op1, Op2); }
 
-  void printLeft(OutputStream &S) const override {
-    S += "(";
-    Op1->print(S);
-    S += ")[";
-    Op2->print(S);
-    S += "]";
+  void printLeft(OutputBuffer &OB) const override {
+    OB += "(";
+    Op1->print(OB);
+    OB += ")[";
+    Op2->print(OB);
+    OB += "]";
   }
 };
 
@@ -1612,11 +1755,11 @@ public:
 
   template<typename Fn> void match(Fn F) const { F(Child, Operator); }
 
-  void printLeft(OutputStream &S) const override {
-    S += "(";
-    Child->print(S);
-    S += ")";
-    S += Operator;
+  void printLeft(OutputBuffer &OB) const override {
+    OB += "(";
+    Child->print(OB);
+    OB += ")";
+    OB += Operator;
   }
 };
 
@@ -1631,14 +1774,14 @@ public:
 
   template<typename Fn> void match(Fn F) const { F(Cond, Then, Else); }
 
-  void printLeft(OutputStream &S) const override {
-    S += "(";
-    Cond->print(S);
-    S += ") ? (";
-    Then->print(S);
-    S += ") : (";
-    Else->print(S);
-    S += ")";
+  void printLeft(OutputBuffer &OB) const override {
+    OB += "(";
+    Cond->print(OB);
+    OB += ") ? (";
+    Then->print(OB);
+    OB += ") : (";
+    Else->print(OB);
+    OB += ")";
   }
 };
 
@@ -1653,10 +1796,10 @@ public:
 
   template<typename Fn> void match(Fn F) const { F(LHS, Kind, RHS); }
 
-  void printLeft(OutputStream &S) const override {
-    LHS->print(S);
-    S += Kind;
-    RHS->print(S);
+  void printLeft(OutputBuffer &OB) const override {
+    LHS->print(OB);
+    OB += Kind;
+    RHS->print(OB);
   }
 };
 
@@ -1677,20 +1820,20 @@ public:
     F(Type, SubExpr, Offset, UnionSelectors, OnePastTheEnd);
   }
 
-  void printLeft(OutputStream &S) const override {
-    SubExpr->print(S);
-    S += ".<";
-    Type->print(S);
-    S += " at offset ";
+  void printLeft(OutputBuffer &OB) const override {
+    SubExpr->print(OB);
+    OB += ".<";
+    Type->print(OB);
+    OB += " at offset ";
     if (Offset.empty()) {
-      S += "0";
+      OB += "0";
     } else if (Offset[0] == 'n') {
-      S += "-";
-      S += Offset.dropFront();
+      OB += "-";
+      OB += Offset.dropFront();
     } else {
-      S += Offset;
+      OB += Offset;
     }
-    S += ">";
+    OB += ">";
   }
 };
 
@@ -1706,10 +1849,10 @@ public:
 
   template<typename Fn> void match(Fn F) const { F(Prefix, Infix, Postfix); }
 
-  void printLeft(OutputStream &S) const override {
-    S += Prefix;
-    Infix->print(S);
-    S += Postfix;
+  void printLeft(OutputBuffer &OB) const override {
+    OB += Prefix;
+    Infix->print(OB);
+    OB += Postfix;
   }
 };
 
@@ -1725,13 +1868,13 @@ public:
 
   template<typename Fn> void match(Fn F) const { F(CastKind, To, From); }
 
-  void printLeft(OutputStream &S) const override {
-    S += CastKind;
-    S += "<";
-    To->printLeft(S);
-    S += ">(";
-    From->printLeft(S);
-    S += ")";
+  void printLeft(OutputBuffer &OB) const override {
+    OB += CastKind;
+    OB += "<";
+    To->printLeft(OB);
+    OB += ">(";
+    From->printLeft(OB);
+    OB += ")";
   }
 };
 
@@ -1744,11 +1887,11 @@ public:
 
   template<typename Fn> void match(Fn F) const { F(Pack); }
 
-  void printLeft(OutputStream &S) const override {
-    S += "sizeof...(";
+  void printLeft(OutputBuffer &OB) const override {
+    OB += "sizeof...(";
     ParameterPackExpansion PPE(Pack);
-    PPE.printLeft(S);
-    S += ")";
+    PPE.printLeft(OB);
+    OB += ")";
   }
 };
 
@@ -1762,11 +1905,11 @@ public:
 
   template<typename Fn> void match(Fn F) const { F(Callee, Args); }
 
-  void printLeft(OutputStream &S) const override {
-    Callee->print(S);
-    S += "(";
-    Args.printWithComma(S);
-    S += ")";
+  void printLeft(OutputBuffer &OB) const override {
+    Callee->print(OB);
+    OB += "(";
+    Args.printWithComma(OB);
+    OB += ")";
   }
 };
 
@@ -1787,25 +1930,24 @@ public:
     F(ExprList, Type, InitList, IsGlobal, IsArray);
   }
 
-  void printLeft(OutputStream &S) const override {
+  void printLeft(OutputBuffer &OB) const override {
     if (IsGlobal)
-      S += "::operator ";
-    S += "new";
+      OB += "::operator ";
+    OB += "new";
     if (IsArray)
-      S += "[]";
-    S += ' ';
+      OB += "[]";
+    OB += ' ';
     if (!ExprList.empty()) {
-      S += "(";
-      ExprList.printWithComma(S);
-      S += ")";
+      OB += "(";
+      ExprList.printWithComma(OB);
+      OB += ")";
     }
-    Type->print(S);
+    Type->print(OB);
     if (!InitList.empty()) {
-      S += "(";
-      InitList.printWithComma(S);
-      S += ")";
+      OB += "(";
+      InitList.printWithComma(OB);
+      OB += ")";
     }
-
   }
 };
 
@@ -1820,13 +1962,13 @@ public:
 
   template<typename Fn> void match(Fn F) const { F(Op, IsGlobal, IsArray); }
 
-  void printLeft(OutputStream &S) const override {
+  void printLeft(OutputBuffer &OB) const override {
     if (IsGlobal)
-      S += "::";
-    S += "delete";
+      OB += "::";
+    OB += "delete";
     if (IsArray)
-      S += "[] ";
-    Op->print(S);
+      OB += "[] ";
+    Op->print(OB);
   }
 };
 
@@ -1840,11 +1982,11 @@ public:
 
   template<typename Fn> void match(Fn F) const { F(Prefix, Child); }
 
-  void printLeft(OutputStream &S) const override {
-    S += Prefix;
-    S += "(";
-    Child->print(S);
-    S += ")";
+  void printLeft(OutputBuffer &OB) const override {
+    OB += Prefix;
+    OB += "(";
+    Child->print(OB);
+    OB += ")";
   }
 };
 
@@ -1856,9 +1998,9 @@ public:
 
   template<typename Fn> void match(Fn F) const { F(Number); }
 
-  void printLeft(OutputStream &S) const override {
-    S += "fp";
-    S += Number;
+  void printLeft(OutputBuffer &OB) const override {
+    OB += "fp";
+    OB += Number;
   }
 };
 
@@ -1872,12 +2014,12 @@ public:
 
   template<typename Fn> void match(Fn F) const { F(Type, Expressions); }
 
-  void printLeft(OutputStream &S) const override {
-    S += "(";
-    Type->print(S);
-    S += ")(";
-    Expressions.printWithComma(S);
-    S += ")";
+  void printLeft(OutputBuffer &OB) const override {
+    OB += "(";
+    Type->print(OB);
+    OB += ")(";
+    Expressions.printWithComma(OB);
+    OB += ")";
   }
 };
 
@@ -1894,12 +2036,12 @@ public:
 
   template<typename Fn> void match(Fn F) const { F(Type, SubExpr, Offset); }
 
-  void printLeft(OutputStream &S) const override {
-    S += "(";
-    Type->print(S);
-    S += ")(";
-    SubExpr->print(S);
-    S += ")";
+  void printLeft(OutputBuffer &OB) const override {
+    OB += "(";
+    Type->print(OB);
+    OB += ")(";
+    SubExpr->print(OB);
+    OB += ")";
   }
 };
 
@@ -1912,12 +2054,12 @@ public:
 
   template<typename Fn> void match(Fn F) const { F(Ty, Inits); }
 
-  void printLeft(OutputStream &S) const override {
+  void printLeft(OutputBuffer &OB) const override {
     if (Ty)
-      Ty->print(S);
-    S += '{';
-    Inits.printWithComma(S);
-    S += '}';
+      Ty->print(OB);
+    OB += '{';
+    Inits.printWithComma(OB);
+    OB += '}';
   }
 };
 
@@ -1931,18 +2073,18 @@ public:
 
   template<typename Fn> void match(Fn F) const { F(Elem, Init, IsArray); }
 
-  void printLeft(OutputStream &S) const override {
+  void printLeft(OutputBuffer &OB) const override {
     if (IsArray) {
-      S += '[';
-      Elem->print(S);
-      S += ']';
+      OB += '[';
+      Elem->print(OB);
+      OB += ']';
     } else {
-      S += '.';
-      Elem->print(S);
+      OB += '.';
+      Elem->print(OB);
     }
     if (Init->getKind() != KBracedExpr && Init->getKind() != KBracedRangeExpr)
-      S += " = ";
-    Init->print(S);
+      OB += " = ";
+    Init->print(OB);
   }
 };
 
@@ -1956,15 +2098,15 @@ public:
 
   template<typename Fn> void match(Fn F) const { F(First, Last, Init); }
 
-  void printLeft(OutputStream &S) const override {
-    S += '[';
-    First->print(S);
-    S += " ... ";
-    Last->print(S);
-    S += ']';
+  void printLeft(OutputBuffer &OB) const override {
+    OB += '[';
+    First->print(OB);
+    OB += " ... ";
+    Last->print(OB);
+    OB += ']';
     if (Init->getKind() != KBracedExpr && Init->getKind() != KBracedRangeExpr)
-      S += " = ";
-    Init->print(S);
+      OB += " = ";
+    Init->print(OB);
   }
 };
 
@@ -1983,43 +2125,43 @@ public:
     F(IsLeftFold, OperatorName, Pack, Init);
   }
 
-  void printLeft(OutputStream &S) const override {
+  void printLeft(OutputBuffer &OB) const override {
     auto PrintPack = [&] {
-      S += '(';
-      ParameterPackExpansion(Pack).print(S);
-      S += ')';
+      OB += '(';
+      ParameterPackExpansion(Pack).print(OB);
+      OB += ')';
     };
 
-    S += '(';
+    OB += '(';
 
     if (IsLeftFold) {
       // init op ... op pack
       if (Init != nullptr) {
-        Init->print(S);
-        S += ' ';
-        S += OperatorName;
-        S += ' ';
+        Init->print(OB);
+        OB += ' ';
+        OB += OperatorName;
+        OB += ' ';
       }
       // ... op pack
-      S += "... ";
-      S += OperatorName;
-      S += ' ';
+      OB += "... ";
+      OB += OperatorName;
+      OB += ' ';
       PrintPack();
     } else { // !IsLeftFold
       // pack op ...
       PrintPack();
-      S += ' ';
-      S += OperatorName;
-      S += " ...";
+      OB += ' ';
+      OB += OperatorName;
+      OB += " ...";
       // pack op ... op init
       if (Init != nullptr) {
-        S += ' ';
-        S += OperatorName;
-        S += ' ';
-        Init->print(S);
+        OB += ' ';
+        OB += OperatorName;
+        OB += ' ';
+        Init->print(OB);
       }
     }
-    S += ')';
+    OB += ')';
   }
 };
 
@@ -2031,9 +2173,9 @@ public:
 
   template<typename Fn> void match(Fn F) const { F(Op); }
 
-  void printLeft(OutputStream &S) const override {
-    S += "throw ";
-    Op->print(S);
+  void printLeft(OutputBuffer &OB) const override {
+    OB += "throw ";
+    Op->print(OB);
   }
 };
 
@@ -2045,8 +2187,8 @@ public:
 
   template<typename Fn> void match(Fn F) const { F(Value); }
 
-  void printLeft(OutputStream &S) const override {
-    S += Value ? StringView("true") : StringView("false");
+  void printLeft(OutputBuffer &OB) const override {
+    OB += Value ? StringView("true") : StringView("false");
   }
 };
 
@@ -2058,10 +2200,10 @@ public:
 
   template<typename Fn> void match(Fn F) const { F(Type); }
 
-  void printLeft(OutputStream &S) const override {
-    S += "\"<";
-    Type->print(S);
-    S += ">\"";
+  void printLeft(OutputBuffer &OB) const override {
+    OB += "\"<";
+    Type->print(OB);
+    OB += ">\"";
   }
 };
 
@@ -2073,11 +2215,11 @@ public:
 
   template<typename Fn> void match(Fn F) const { F(Type); }
 
-  void printLeft(OutputStream &S) const override {
-    S += "[]";
+  void printLeft(OutputBuffer &OB) const override {
+    OB += "[]";
     if (Type->getKind() == KClosureTypeName)
-      static_cast<const ClosureTypeName *>(Type)->printDeclarator(S);
-    S += "{...}";
+      static_cast<const ClosureTypeName *>(Type)->printDeclarator(OB);
+    OB += "{...}";
   }
 };
 
@@ -2092,15 +2234,15 @@ public:
 
   template<typename Fn> void match(Fn F) const { F(Ty, Integer); }
 
-  void printLeft(OutputStream &S) const override {
-    S << "(";
-    Ty->print(S);
-    S << ")";
+  void printLeft(OutputBuffer &OB) const override {
+    OB << "(";
+    Ty->print(OB);
+    OB << ")";
 
     if (Integer[0] == 'n')
-      S << "-" << Integer.dropFront(1);
+      OB << "-" << Integer.dropFront(1);
     else
-      S << Integer;
+      OB << Integer;
   }
 };
 
@@ -2114,21 +2256,21 @@ public:
 
   template<typename Fn> void match(Fn F) const { F(Type, Value); }
 
-  void printLeft(OutputStream &S) const override {
+  void printLeft(OutputBuffer &OB) const override {
     if (Type.size() > 3) {
-      S += "(";
-      S += Type;
-      S += ")";
+      OB += "(";
+      OB += Type;
+      OB += ")";
     }
 
     if (Value[0] == 'n') {
-      S += "-";
-      S += Value.dropFront(1);
+      OB += "-";
+      OB += Value.dropFront(1);
     } else
-      S += Value;
+      OB += Value;
 
     if (Type.size() <= 3)
-      S += Type;
+      OB += Type;
   }
 };
 
@@ -2158,7 +2300,7 @@ public:
 
   template<typename Fn> void match(Fn F) const { F(Contents); }
 
-  void printLeft(OutputStream &s) const override {
+  void printLeft(OutputBuffer &OB) const override {
     const char *first = Contents.begin();
     const char *last = Contents.end() + 1;
 
@@ -2184,7 +2326,7 @@ public:
 #endif
       char num[FloatData<Float>::max_demangled_size] = {0};
       int n = snprintf(num, sizeof(num), FloatData<Float>::spec, value);
-      s += StringView(num, num + n);
+      OB += StringView(num, num + n);
     }
   }
 };
@@ -2217,125 +2359,6 @@ FOR_EACH_NODE_KIND(SPECIALIZATION)
 
 #undef FOR_EACH_NODE_KIND
 
-template <class T, size_t N>
-class PODSmallVector {
-  static_assert(std::is_pod<T>::value,
-                "T is required to be a plain old data type");
-
-  T* First = nullptr;
-  T* Last = nullptr;
-  T* Cap = nullptr;
-  T Inline[N] = {0};
-
-  bool isInline() const { return First == Inline; }
-
-  void clearInline() {
-    First = Inline;
-    Last = Inline;
-    Cap = Inline + N;
-  }
-
-  void reserve(size_t NewCap) {
-    size_t S = size();
-    if (isInline()) {
-      auto* Tmp = static_cast<T*>(std::malloc(NewCap * sizeof(T)));
-      if (Tmp == nullptr)
-        std::terminate();
-      std::copy(First, Last, Tmp);
-      First = Tmp;
-    } else {
-      First = static_cast<T*>(std::realloc(First, NewCap * sizeof(T)));
-      if (First == nullptr)
-        std::terminate();
-    }
-    Last = First + S;
-    Cap = First + NewCap;
-  }
-
-public:
-  PODSmallVector() : First(Inline), Last(First), Cap(Inline + N) {}
-
-  PODSmallVector(const PODSmallVector&) = delete;
-  PODSmallVector& operator=(const PODSmallVector&) = delete;
-
-  PODSmallVector(PODSmallVector&& Other) : PODSmallVector() {
-    if (Other.isInline()) {
-      std::copy(Other.begin(), Other.end(), First);
-      Last = First + Other.size();
-      Other.clear();
-      return;
-    }
-
-    First = Other.First;
-    Last = Other.Last;
-    Cap = Other.Cap;
-    Other.clearInline();
-  }
-
-  PODSmallVector& operator=(PODSmallVector&& Other) {
-    if (Other.isInline()) {
-      if (!isInline()) {
-        std::free(First);
-        clearInline();
-      }
-      std::copy(Other.begin(), Other.end(), First);
-      Last = First + Other.size();
-      Other.clear();
-      return *this;
-    }
-
-    if (isInline()) {
-      First = Other.First;
-      Last = Other.Last;
-      Cap = Other.Cap;
-      Other.clearInline();
-      return *this;
-    }
-
-    std::swap(First, Other.First);
-    std::swap(Last, Other.Last);
-    std::swap(Cap, Other.Cap);
-    Other.clear();
-    return *this;
-  }
-
-  void push_back(const T& Elem) {
-    if (Last == Cap)
-      reserve(size() * 2);
-    *Last++ = Elem;
-  }
-
-  void pop_back() {
-    assert(Last != First && "Popping empty vector!");
-    --Last;
-  }
-
-  void dropBack(size_t Index) {
-    assert(Index <= size() && "dropBack() can't expand!");
-    Last = First + Index;
-  }
-
-  T* begin() { return First; }
-  T* end() { return Last; }
-
-  bool empty() const { return First == Last; }
-  size_t size() const { return static_cast<size_t>(Last - First); }
-  T& back() {
-    assert(Last != First && "Calling back() on empty vector!");
-    return *(Last - 1);
-  }
-  T& operator[](size_t Index) {
-    assert(Index < size() && "Invalid access!");
-    return *(begin() + Index);
-  }
-  void clear() { Last = First; }
-
-  ~PODSmallVector() {
-    if (!isInline())
-      std::free(First);
-  }
-};
-
 template <typename Derived, typename Alloc> struct AbstractManglingParser {
   const char *First;
   const char *Last;
@@ -3884,6 +3907,16 @@ Node *AbstractManglingParser<Derived, Alloc>::parseType() {
     case 'h':
       First += 2;
       return make<NameType>("half");
+    //                ::= DF <number> _ # ISO/IEC TS 18661 binary floating point (N bits)
+    case 'F': {
+      First += 2;
+      Node *DimensionNumber = make<NameType>(parseNumber());
+      if (!DimensionNumber)
+        return nullptr;
+      if (!consumeIf('_'))
+        return nullptr;
+      return make<BinaryFPType>(DimensionNumber);
+    }
     //                ::= Di   # char32_t
     case 'i':
       First += 2;
diff --git a/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h b/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h
index 77446e9b0f07..46daa3885a06 100644
--- a/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h
+++ b/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h
@@ -21,11 +21,11 @@
 
 namespace llvm {
 namespace itanium_demangle {
-class OutputStream;
+class OutputBuffer;
 }
 }
 
-using llvm::itanium_demangle::OutputStream;
+using llvm::itanium_demangle::OutputBuffer;
 using llvm::itanium_demangle::StringView;
 
 namespace llvm {
@@ -80,6 +80,7 @@ enum OutputFlags {
   OF_NoAccessSpecifier = 4,
   OF_NoMemberType = 8,
   OF_NoReturnType = 16,
+  OF_NoVariableType = 32,
 };
 
 // Types
@@ -261,7 +262,7 @@ struct Node {
 
   NodeKind kind() const { return Kind; }
 
-  virtual void output(OutputStream &OS, OutputFlags Flags) const = 0;
+  virtual void output(OutputBuffer &OB, OutputFlags Flags) const = 0;
 
   std::string toString(OutputFlags Flags = OF_Default) const;
 
@@ -300,12 +301,12 @@ struct SpecialTableSymbolNode;
 struct TypeNode : public Node {
   explicit TypeNode(NodeKind K) : Node(K) {}
 
-  virtual void outputPre(OutputStream &OS, OutputFlags Flags) const = 0;
-  virtual void outputPost(OutputStream &OS, OutputFlags Flags) const = 0;
+  virtual void outputPre(OutputBuffer &OB, OutputFlags Flags) const = 0;
+  virtual void outputPost(OutputBuffer &OB, OutputFlags Flags) const = 0;
 
-  void output(OutputStream &OS, OutputFlags Flags) const override {
-    outputPre(OS, Flags);
-    outputPost(OS, Flags);
+  void output(OutputBuffer &OB, OutputFlags Flags) const override {
+    outputPre(OB, Flags);
+    outputPost(OB, Flags);
   }
 
   Qualifiers Quals = Q_None;
@@ -315,8 +316,8 @@ struct PrimitiveTypeNode : public TypeNode {
   explicit PrimitiveTypeNode(PrimitiveKind K)
       : TypeNode(NodeKind::PrimitiveType), PrimKind(K) {}
 
-  void outputPre(OutputStream &OS, OutputFlags Flags) const override;
-  void outputPost(OutputStream &OS, OutputFlags Flags) const override {}
+  void outputPre(OutputBuffer &OB, OutputFlags Flags) const override;
+  void outputPost(OutputBuffer &OB, OutputFlags Flags) const override {}
 
   PrimitiveKind PrimKind;
 };
@@ -325,8 +326,8 @@ struct FunctionSignatureNode : public TypeNode {
   explicit FunctionSignatureNode(NodeKind K) : TypeNode(K) {}
   FunctionSignatureNode() : TypeNode(NodeKind::FunctionSignature) {}
 
-  void outputPre(OutputStream &OS, OutputFlags Flags) const override;
-  void outputPost(OutputStream &OS, OutputFlags Flags) const override;
+  void outputPre(OutputBuffer &OB, OutputFlags Flags) const override;
+  void outputPost(OutputBuffer &OB, OutputFlags Flags) const override;
 
   // Valid if this FunctionTypeNode is the Pointee of a PointerType or
   // MemberPointerType.
@@ -359,13 +360,13 @@ struct IdentifierNode : public Node {
   NodeArrayNode *TemplateParams = nullptr;
 
 protected:
-  void outputTemplateParameters(OutputStream &OS, OutputFlags Flags) const;
+  void outputTemplateParameters(OutputBuffer &OB, OutputFlags Flags) const;
 };
 
 struct VcallThunkIdentifierNode : public IdentifierNode {
   VcallThunkIdentifierNode() : IdentifierNode(NodeKind::VcallThunkIdentifier) {}
 
-  void output(OutputStream &OS, OutputFlags Flags) const override;
+  void output(OutputBuffer &OB, OutputFlags Flags) const override;
 
   uint64_t OffsetInVTable = 0;
 };
@@ -374,7 +375,7 @@ struct DynamicStructorIdentifierNode : public IdentifierNode {
   DynamicStructorIdentifierNode()
       : IdentifierNode(NodeKind::DynamicStructorIdentifier) {}
 
-  void output(OutputStream &OS, OutputFlags Flags) const override;
+  void output(OutputBuffer &OB, OutputFlags Flags) const override;
 
   VariableSymbolNode *Variable = nullptr;
   QualifiedNameNode *Name = nullptr;
@@ -384,7 +385,7 @@ struct DynamicStructorIdentifierNode : public IdentifierNode {
 struct NamedIdentifierNode : public IdentifierNode {
   NamedIdentifierNode() : IdentifierNode(NodeKind::NamedIdentifier) {}
 
-  void output(OutputStream &OS, OutputFlags Flags) const override;
+  void output(OutputBuffer &OB, OutputFlags Flags) const override;
 
   StringView Name;
 };
@@ -394,7 +395,7 @@ struct IntrinsicFunctionIdentifierNode : public IdentifierNode {
       : IdentifierNode(NodeKind::IntrinsicFunctionIdentifier),
         Operator(Operator) {}
 
-  void output(OutputStream &OS, OutputFlags Flags) const override;
+  void output(OutputBuffer &OB, OutputFlags Flags) const override;
 
   IntrinsicFunctionKind Operator;
 };
@@ -403,7 +404,7 @@ struct LiteralOperatorIdentifierNode : public IdentifierNode {
   LiteralOperatorIdentifierNode()
       : IdentifierNode(NodeKind::LiteralOperatorIdentifier) {}
 
-  void output(OutputStream &OS, OutputFlags Flags) const override;
+  void output(OutputBuffer &OB, OutputFlags Flags) const override;
 
   StringView Name;
 };
@@ -412,7 +413,7 @@ struct LocalStaticGuardIdentifierNode : public IdentifierNode {
   LocalStaticGuardIdentifierNode()
       : IdentifierNode(NodeKind::LocalStaticGuardIdentifier) {}
 
-  void output(OutputStream &OS, OutputFlags Flags) const override;
+  void output(OutputBuffer &OB, OutputFlags Flags) const override;
 
   bool IsThread = false;
   uint32_t ScopeIndex = 0;
@@ -422,7 +423,7 @@ struct ConversionOperatorIdentifierNode : public IdentifierNode {
   ConversionOperatorIdentifierNode()
       : IdentifierNode(NodeKind::ConversionOperatorIdentifier) {}
 
-  void output(OutputStream &OS, OutputFlags Flags) const override;
+  void output(OutputBuffer &OB, OutputFlags Flags) const override;
 
   // The type that this operator converts too.
   TypeNode *TargetType = nullptr;
@@ -434,7 +435,7 @@ struct StructorIdentifierNode : public IdentifierNode {
       : IdentifierNode(NodeKind::StructorIdentifier),
         IsDestructor(IsDestructor) {}
 
-  void output(OutputStream &OS, OutputFlags Flags) const override;
+  void output(OutputBuffer &OB, OutputFlags Flags) const override;
 
   // The name of the class that this is a structor of.
   IdentifierNode *Class = nullptr;
@@ -444,8 +445,8 @@ struct StructorIdentifierNode : public IdentifierNode {
 struct ThunkSignatureNode : public FunctionSignatureNode {
   ThunkSignatureNode() : FunctionSignatureNode(NodeKind::ThunkSignature) {}
 
-  void outputPre(OutputStream &OS, OutputFlags Flags) const override;
-  void outputPost(OutputStream &OS, OutputFlags Flags) const override;
+  void outputPre(OutputBuffer &OB, OutputFlags Flags) const override;
+  void outputPost(OutputBuffer &OB, OutputFlags Flags) const override;
 
   struct ThisAdjustor {
     uint32_t StaticOffset = 0;
@@ -459,8 +460,8 @@ struct ThunkSignatureNode : public FunctionSignatureNode {
 
 struct PointerTypeNode : public TypeNode {
   PointerTypeNode() : TypeNode(NodeKind::PointerType) {}
-  void outputPre(OutputStream &OS, OutputFlags Flags) const override;
-  void outputPost(OutputStream &OS, OutputFlags Flags) const override;
+  void outputPre(OutputBuffer &OB, OutputFlags Flags) const override;
+  void outputPost(OutputBuffer &OB, OutputFlags Flags) const override;
 
   // Is this a pointer, reference, or rvalue-reference?
   PointerAffinity Affinity = PointerAffinity::None;
@@ -476,8 +477,8 @@ struct PointerTypeNode : public TypeNode {
 struct TagTypeNode : public TypeNode {
   explicit TagTypeNode(TagKind Tag) : TypeNode(NodeKind::TagType), Tag(Tag) {}
 
-  void outputPre(OutputStream &OS, OutputFlags Flags) const override;
-  void outputPost(OutputStream &OS, OutputFlags Flags) const override;
+  void outputPre(OutputBuffer &OB, OutputFlags Flags) const override;
+  void outputPost(OutputBuffer &OB, OutputFlags Flags) const override;
 
   QualifiedNameNode *QualifiedName = nullptr;
   TagKind Tag;
@@ -486,11 +487,11 @@ struct TagTypeNode : public TypeNode {
 struct ArrayTypeNode : public TypeNode {
   ArrayTypeNode() : TypeNode(NodeKind::ArrayType) {}
 
-  void outputPre(OutputStream &OS, OutputFlags Flags) const override;
-  void outputPost(OutputStream &OS, OutputFlags Flags) const override;
+  void outputPre(OutputBuffer &OB, OutputFlags Flags) const override;
+  void outputPost(OutputBuffer &OB, OutputFlags Flags) const override;
 
-  void outputDimensionsImpl(OutputStream &OS, OutputFlags Flags) const;
-  void outputOneDimension(OutputStream &OS, OutputFlags Flags, Node *N) const;
+  void outputDimensionsImpl(OutputBuffer &OB, OutputFlags Flags) const;
+  void outputOneDimension(OutputBuffer &OB, OutputFlags Flags, Node *N) const;
 
   // A list of array dimensions.  e.g. [3,4,5] in `int Foo[3][4][5]`
   NodeArrayNode *Dimensions = nullptr;
@@ -501,14 +502,14 @@ struct ArrayTypeNode : public TypeNode {
 
 struct IntrinsicNode : public TypeNode {
   IntrinsicNode() : TypeNode(NodeKind::IntrinsicType) {}
-  void output(OutputStream &OS, OutputFlags Flags) const override {}
+  void output(OutputBuffer &OB, OutputFlags Flags) const override {}
 };
 
 struct CustomTypeNode : public TypeNode {
   CustomTypeNode() : TypeNode(NodeKind::Custom) {}
 
-  void outputPre(OutputStream &OS, OutputFlags Flags) const override;
-  void outputPost(OutputStream &OS, OutputFlags Flags) const override;
+  void outputPre(OutputBuffer &OB, OutputFlags Flags) const override;
+  void outputPost(OutputBuffer &OB, OutputFlags Flags) const override;
 
   IdentifierNode *Identifier = nullptr;
 };
@@ -516,9 +517,9 @@ struct CustomTypeNode : public TypeNode {
 struct NodeArrayNode : public Node {
   NodeArrayNode() : Node(NodeKind::NodeArray) {}
 
-  void output(OutputStream &OS, OutputFlags Flags) const override;
+  void output(OutputBuffer &OB, OutputFlags Flags) const override;
 
-  void output(OutputStream &OS, OutputFlags Flags, StringView Separator) const;
+  void output(OutputBuffer &OB, OutputFlags Flags, StringView Separator) const;
 
   Node **Nodes = nullptr;
   size_t Count = 0;
@@ -527,7 +528,7 @@ struct NodeArrayNode : public Node {
 struct QualifiedNameNode : public Node {
   QualifiedNameNode() : Node(NodeKind::QualifiedName) {}
 
-  void output(OutputStream &OS, OutputFlags Flags) const override;
+  void output(OutputBuffer &OB, OutputFlags Flags) const override;
 
   NodeArrayNode *Components = nullptr;
 
@@ -541,7 +542,7 @@ struct TemplateParameterReferenceNode : public Node {
   TemplateParameterReferenceNode()
       : Node(NodeKind::TemplateParameterReference) {}
 
-  void output(OutputStream &OS, OutputFlags Flags) const override;
+  void output(OutputBuffer &OB, OutputFlags Flags) const override;
 
   SymbolNode *Symbol = nullptr;
 
@@ -556,7 +557,7 @@ struct IntegerLiteralNode : public Node {
   IntegerLiteralNode(uint64_t Value, bool IsNegative)
       : Node(NodeKind::IntegerLiteral), Value(Value), IsNegative(IsNegative) {}
 
-  void output(OutputStream &OS, OutputFlags Flags) const override;
+  void output(OutputBuffer &OB, OutputFlags Flags) const override;
 
   uint64_t Value = 0;
   bool IsNegative = false;
@@ -566,7 +567,7 @@ struct RttiBaseClassDescriptorNode : public IdentifierNode {
   RttiBaseClassDescriptorNode()
       : IdentifierNode(NodeKind::RttiBaseClassDescriptor) {}
 
-  void output(OutputStream &OS, OutputFlags Flags) const override;
+  void output(OutputBuffer &OB, OutputFlags Flags) const override;
 
   uint32_t NVOffset = 0;
   int32_t VBPtrOffset = 0;
@@ -576,7 +577,7 @@ struct RttiBaseClassDescriptorNode : public IdentifierNode {
 
 struct SymbolNode : public Node {
   explicit SymbolNode(NodeKind K) : Node(K) {}
-  void output(OutputStream &OS, OutputFlags Flags) const override;
+  void output(OutputBuffer &OB, OutputFlags Flags) const override;
   QualifiedNameNode *Name = nullptr;
 };
 
@@ -584,7 +585,7 @@ struct SpecialTableSymbolNode : public SymbolNode {
   explicit SpecialTableSymbolNode()
       : SymbolNode(NodeKind::SpecialTableSymbol) {}
 
-  void output(OutputStream &OS, OutputFlags Flags) const override;
+  void output(OutputBuffer &OB, OutputFlags Flags) const override;
   QualifiedNameNode *TargetName = nullptr;
   Qualifiers Quals = Qualifiers::Q_None;
 };
@@ -593,7 +594,7 @@ struct LocalStaticGuardVariableNode : public SymbolNode {
   LocalStaticGuardVariableNode()
       : SymbolNode(NodeKind::LocalStaticGuardVariable) {}
 
-  void output(OutputStream &OS, OutputFlags Flags) const override;
+  void output(OutputBuffer &OB, OutputFlags Flags) const override;
 
   bool IsVisible = false;
 };
@@ -601,7 +602,7 @@ struct LocalStaticGuardVariableNode : public SymbolNode {
 struct EncodedStringLiteralNode : public SymbolNode {
   EncodedStringLiteralNode() : SymbolNode(NodeKind::EncodedStringLiteral) {}
 
-  void output(OutputStream &OS, OutputFlags Flags) const override;
+  void output(OutputBuffer &OB, OutputFlags Flags) const override;
 
   StringView DecodedString;
   bool IsTruncated = false;
@@ -611,7 +612,7 @@ struct EncodedStringLiteralNode : public SymbolNode {
 struct VariableSymbolNode : public SymbolNode {
   VariableSymbolNode() : SymbolNode(NodeKind::VariableSymbol) {}
 
-  void output(OutputStream &OS, OutputFlags Flags) const override;
+  void output(OutputBuffer &OB, OutputFlags Flags) const override;
 
   StorageClass SC = StorageClass::None;
   TypeNode *Type = nullptr;
@@ -620,7 +621,7 @@ struct VariableSymbolNode : public SymbolNode {
 struct FunctionSymbolNode : public SymbolNode {
   FunctionSymbolNode() : SymbolNode(NodeKind::FunctionSymbol) {}
 
-  void output(OutputStream &OS, OutputFlags Flags) const override;
+  void output(OutputBuffer &OB, OutputFlags Flags) const override;
 
   FunctionSignatureNode *Signature = nullptr;
 };
diff --git a/llvm/include/llvm/Demangle/Utility.h b/llvm/include/llvm/Demangle/Utility.h
index 04ff65a35aed..4fea9351a4bf 100644
--- a/llvm/include/llvm/Demangle/Utility.h
+++ b/llvm/include/llvm/Demangle/Utility.h
@@ -24,7 +24,7 @@ DEMANGLE_NAMESPACE_BEGIN
 
 // Stream that AST nodes write their string representation into after the AST
 // has been parsed.
-class OutputStream {
+class OutputBuffer {
   char *Buffer = nullptr;
   size_t CurrentPosition = 0;
   size_t BufferCapacity = 0;
@@ -63,9 +63,9 @@ class OutputStream {
   }
 
 public:
-  OutputStream(char *StartBuf, size_t Size)
+  OutputBuffer(char *StartBuf, size_t Size)
       : Buffer(StartBuf), CurrentPosition(0), BufferCapacity(Size) {}
-  OutputStream() = default;
+  OutputBuffer() = default;
   void reset(char *Buffer_, size_t BufferCapacity_) {
     CurrentPosition = 0;
     Buffer = Buffer_;
@@ -77,7 +77,7 @@ public:
   unsigned CurrentPackIndex = std::numeric_limits<unsigned>::max();
   unsigned CurrentPackMax = std::numeric_limits<unsigned>::max();
 
-  OutputStream &operator+=(StringView R) {
+  OutputBuffer &operator+=(StringView R) {
     size_t Size = R.size();
     if (Size == 0)
       return *this;
@@ -87,17 +87,28 @@ public:
     return *this;
   }
 
-  OutputStream &operator+=(char C) {
+  OutputBuffer &operator+=(char C) {
     grow(1);
     Buffer[CurrentPosition++] = C;
     return *this;
   }
 
-  OutputStream &operator<<(StringView R) { return (*this += R); }
+  OutputBuffer &operator<<(StringView R) { return (*this += R); }
 
-  OutputStream &operator<<(char C) { return (*this += C); }
+  OutputBuffer prepend(StringView R) {
+    size_t Size = R.size();
+
+    grow(Size);
+    std::memmove(Buffer + Size, Buffer, CurrentPosition);
+    std::memcpy(Buffer, R.begin(), Size);
+    CurrentPosition += Size;
 
-  OutputStream &operator<<(long long N) {
+    return *this;
+  }
+
+  OutputBuffer &operator<<(char C) { return (*this += C); }
+
+  OutputBuffer &operator<<(long long N) {
     if (N < 0)
       writeUnsigned(static_cast<unsigned long long>(-N), true);
     else
@@ -105,27 +116,37 @@ public:
     return *this;
   }
 
-  OutputStream &operator<<(unsigned long long N) {
+  OutputBuffer &operator<<(unsigned long long N) {
     writeUnsigned(N, false);
     return *this;
   }
 
-  OutputStream &operator<<(long N) {
+  OutputBuffer &operator<<(long N) {
     return this->operator<<(static_cast<long long>(N));
   }
 
-  OutputStream &operator<<(unsigned long N) {
+  OutputBuffer &operator<<(unsigned long N) {
     return this->operator<<(static_cast<unsigned long long>(N));
   }
 
-  OutputStream &operator<<(int N) {
+  OutputBuffer &operator<<(int N) {
     return this->operator<<(static_cast<long long>(N));
   }
 
-  OutputStream &operator<<(unsigned int N) {
+  OutputBuffer &operator<<(unsigned int N) {
     return this->operator<<(static_cast<unsigned long long>(N));
   }
 
+  void insert(size_t Pos, const char *S, size_t N) {
+    assert(Pos <= CurrentPosition);
+    if (N == 0)
+      return;
+    grow(N);
+    std::memmove(Buffer + Pos + N, Buffer + Pos, CurrentPosition - Pos);
+    std::memcpy(Buffer + Pos, S, N);
+    CurrentPosition += N;
+  }
+
   size_t getCurrentPosition() const { return CurrentPosition; }
   void setCurrentPosition(size_t NewPos) { CurrentPosition = NewPos; }
 
@@ -171,7 +192,7 @@ public:
   SwapAndRestore &operator=(const SwapAndRestore &) = delete;
 };
 
-inline bool initializeOutputStream(char *Buf, size_t *N, OutputStream &S,
+inline bool initializeOutputBuffer(char *Buf, size_t *N, OutputBuffer &OB,
                                    size_t InitSize) {
   size_t BufferSize;
   if (Buf == nullptr) {
@@ -182,7 +203,7 @@ inline bool initializeOutputStream(char *Buf, size_t *N, OutputStream &S,
   } else
     BufferSize = *N;
 
-  S.reset(Buf, BufferSize);
+  OB.reset(Buf, BufferSize);
   return true;
 }
 
diff --git a/llvm/include/llvm/ExecutionEngine/ExecutionEngine.h b/llvm/include/llvm/ExecutionEngine/ExecutionEngine.h
index 2e386518f0bf..43c91fb5f988 100644
--- a/llvm/include/llvm/ExecutionEngine/ExecutionEngine.h
+++ b/llvm/include/llvm/ExecutionEngine/ExecutionEngine.h
@@ -21,7 +21,6 @@
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ExecutionEngine/JITSymbol.h"
-#include "llvm/ExecutionEngine/OrcV1Deprecation.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Object/Binary.h"
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/ELF_aarch64.h b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_aarch64.h
new file mode 100644
index 000000000000..50eb598139ea
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_aarch64.h
@@ -0,0 +1,39 @@
+//===--- ELF_aarch64.h - JIT link functions for ELF/aarch64 --*- C++ -*----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+//
+// jit-link functions for ELF/aarch64.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_JITLINK_ELF_AARCH64_H
+#define LLVM_EXECUTIONENGINE_JITLINK_ELF_AARCH64_H
+
+#include "llvm/ExecutionEngine/JITLink/JITLink.h"
+
+namespace llvm {
+namespace jitlink {
+
+/// Create a LinkGraph from an ELF/aarch64 relocatable object
+///
+/// Note: The graph does not take ownership of the underlying buffer, nor copy
+/// its contents. The caller is responsible for ensuring that the object buffer
+/// outlives the graph.
+Expected<std::unique_ptr<LinkGraph>>
+createLinkGraphFromELFObject_aarch64(MemoryBufferRef ObjectBuffer);
+
+/// jit-link the given object buffer, which must be a ELF aarch64 relocatable
+/// object file.
+void link_ELF_aarch64(std::unique_ptr<LinkGraph> G,
+                      std::unique_ptr<JITLinkContext> Ctx);
+
+} // end namespace jitlink
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_JITLINK_ELF_AARCH64_H
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/ELF_riscv.h b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_riscv.h
index 1339ab51cbb9..5a8b186a2c3e 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/ELF_riscv.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_riscv.h
@@ -35,4 +35,4 @@ void link_ELF_riscv(std::unique_ptr<LinkGraph> G,
 } // end namespace jitlink
 } // end namespace llvm
 
-#endif // LLVM_EXECUTIONENGINE_JITLINK_ELF_RISCV64_H
+#endif // LLVM_EXECUTIONENGINE_JITLINK_ELF_RISCV_H
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/ELF_x86_64.h b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_x86_64.h
index d8ed953363e6..f5fa9e96c594 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/ELF_x86_64.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_x86_64.h
@@ -21,29 +21,17 @@ namespace jitlink {
 namespace ELF_x86_64_Edges {
 enum ELFX86RelocationKind : Edge::Kind {
   Branch32 = Edge::FirstRelocation,
-  Branch32ToStub,
-  Pointer32,
+  Pointer32Signed,
   Pointer64,
-  Pointer64Anon,
   PCRel32,
-  PCRel64,
-  PCRel32Minus1,
-  PCRel32Minus2,
-  PCRel32Minus4,
-  PCRel32Anon,
-  PCRel32Minus1Anon,
-  PCRel32Minus2Anon,
-  PCRel32Minus4Anon,
   PCRel32GOTLoad,
-  PCRel32GOT,
+  PCRel32GOTLoadRelaxable,
+  PCRel32REXGOTLoadRelaxable,
+  PCRel32TLV,
   PCRel64GOT,
   GOTOFF64,
   GOT64,
-  PCRel32TLV,
-  Delta32,
   Delta64,
-  NegDelta32,
-  NegDelta64,
 };
 
 } // end namespace ELF_x86_64_Edges
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h
index 6162a675ec12..83d85953fce6 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h
@@ -13,19 +13,19 @@
 #ifndef LLVM_EXECUTIONENGINE_JITLINK_JITLINK_H
 #define LLVM_EXECUTIONENGINE_JITLINK_JITLINK_H
 
-#include "JITLinkMemoryManager.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h"
+#include "llvm/ExecutionEngine/JITLink/MemoryFlags.h"
 #include "llvm/ExecutionEngine/JITSymbol.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Support/Memory.h"
 #include "llvm/Support/MemoryBuffer.h"
 
 #include <map>
@@ -225,7 +225,7 @@ public:
 
   /// Get the content for this block. Block must not be a zero-fill block.
   ArrayRef<char> getContent() const {
-    assert(Data && "Section does not contain content");
+    assert(Data && "Block does not contain content");
     return ArrayRef<char>(Data, Size);
   }
 
@@ -233,6 +233,7 @@ public:
   /// Caller is responsible for ensuring the underlying bytes are not
   /// deallocated while pointed to by this block.
   void setContent(ArrayRef<char> Content) {
+    assert(Content.data() && "Setting null content");
     Data = Content.data();
     Size = Content.size();
     ContentMutable = false;
@@ -251,6 +252,7 @@ public:
   /// to call this on a block with immutable content -- consider using
   /// getMutableContent instead.
   MutableArrayRef<char> getAlreadyMutableContent() {
+    assert(Data && "Block does not contain content");
     assert(ContentMutable && "Content is not mutable");
     return MutableArrayRef<char>(const_cast<char *>(Data), Size);
   }
@@ -260,6 +262,7 @@ public:
   /// The caller is responsible for ensuring that the memory pointed to by
   /// MutableContent is not deallocated while pointed to by this block.
   void setMutableContent(MutableArrayRef<char> MutableContent) {
+    assert(MutableContent.data() && "Setting null content");
     Data = MutableContent.data();
     Size = MutableContent.size();
     ContentMutable = true;
@@ -295,6 +298,7 @@ public:
   /// Add an edge to this block.
   void addEdge(Edge::Kind K, Edge::OffsetT Offset, Symbol &Target,
                Edge::AddendT Addend) {
+    assert(!isZeroFill() && "Adding edge to zero-fill block?");
     Edges.push_back(Edge(K, Offset, Target, Addend));
   }
 
@@ -339,6 +343,12 @@ private:
   std::vector<Edge> Edges;
 };
 
+// Align a JITTargetAddress to conform with block alignment requirements.
+inline JITTargetAddress alignToBlock(JITTargetAddress Addr, Block &B) {
+  uint64_t Delta = (B.getAlignmentOffset() - Addr) % B.getAlignment();
+  return Addr + Delta;
+}
+
 /// Describes symbol linkage. This can be used to make resolve definition
 /// clashes.
 enum class Linkage : uint8_t {
@@ -640,8 +650,7 @@ class Section {
   friend class LinkGraph;
 
 private:
-  Section(StringRef Name, sys::Memory::ProtectionFlags Prot,
-          SectionOrdinal SecOrdinal)
+  Section(StringRef Name, MemProt Prot, SectionOrdinal SecOrdinal)
       : Name(Name), Prot(Prot), SecOrdinal(SecOrdinal) {}
 
   using SymbolSet = DenseSet<Symbol *>;
@@ -666,12 +675,16 @@ public:
   StringRef getName() const { return Name; }
 
   /// Returns the protection flags for this section.
-  sys::Memory::ProtectionFlags getProtectionFlags() const { return Prot; }
+  MemProt getMemProt() const { return Prot; }
 
   /// Set the protection flags for this section.
-  void setProtectionFlags(sys::Memory::ProtectionFlags Prot) {
-    this->Prot = Prot;
-  }
+  void setMemProt(MemProt Prot) { this->Prot = Prot; }
+
+  /// Get the deallocation policy for this section.
+  MemDeallocPolicy getMemDeallocPolicy() const { return MDP; }
+
+  /// Set the deallocation policy for this section.
+  void setMemDeallocPolicy(MemDeallocPolicy MDP) { this->MDP = MDP; }
 
   /// Returns the ordinal for this section.
   SectionOrdinal getOrdinal() const { return SecOrdinal; }
@@ -686,6 +699,7 @@ public:
     return make_range(Blocks.begin(), Blocks.end());
   }
 
+  /// Returns the number of blocks in this section.
   BlockSet::size_type blocks_size() const { return Blocks.size(); }
 
   /// Returns an iterator over the symbols defined in this section.
@@ -734,7 +748,8 @@ private:
   }
 
   StringRef Name;
-  sys::Memory::ProtectionFlags Prot;
+  MemProt Prot;
+  MemDeallocPolicy MDP = MemDeallocPolicy::Standard;
   SectionOrdinal SecOrdinal = 0;
   BlockSet Blocks;
   SymbolSet Symbols;
@@ -916,6 +931,11 @@ public:
       : Name(std::move(Name)), TT(TT), PointerSize(PointerSize),
         Endianness(Endianness), GetEdgeKindName(std::move(GetEdgeKindName)) {}
 
+  LinkGraph(const LinkGraph &) = delete;
+  LinkGraph &operator=(const LinkGraph &) = delete;
+  LinkGraph(LinkGraph &&) = delete;
+  LinkGraph &operator=(LinkGraph &&) = delete;
+
   /// Returns the name of this graph (usually the name of the original
   /// underlying MemoryBuffer).
   const std::string &getName() const { return Name; }
@@ -962,7 +982,7 @@ public:
   }
 
   /// Create a section with the given name, protection flags, and alignment.
-  Section &createSection(StringRef Name, sys::Memory::ProtectionFlags Prot) {
+  Section &createSection(StringRef Name, MemProt Prot) {
     assert(llvm::find_if(Sections,
                          [&](std::unique_ptr<Section> &Sec) {
                            return Sec->getName() == Name;
@@ -1100,10 +1120,10 @@ public:
   Symbol &addDefinedSymbol(Block &Content, JITTargetAddress Offset,
                            StringRef Name, JITTargetAddress Size, Linkage L,
                            Scope S, bool IsCallable, bool IsLive) {
-    assert(llvm::count_if(defined_symbols(),
-                          [&](const Symbol *Sym) {
-                            return Sym->getName() == Name;
-                          }) == 0 &&
+    assert((S == Scope::Local || llvm::count_if(defined_symbols(),
+                                                [&](const Symbol *Sym) {
+                                                  return Sym->getName() == Name;
+                                                }) == 0) &&
            "Duplicate defined symbol");
     auto &Sym =
         Symbol::constructNamedDef(Allocator.Allocate<Symbol>(), Content, Offset,
@@ -1237,6 +1257,7 @@ public:
   void transferDefinedSymbol(Symbol &Sym, Block &DestBlock,
                              JITTargetAddress NewOffset,
                              Optional<JITTargetAddress> ExplicitNewSize) {
+    auto &OldSection = Sym.getBlock().getSection();
     Sym.setBlock(DestBlock);
     Sym.setOffset(NewOffset);
     if (ExplicitNewSize)
@@ -1246,6 +1267,10 @@ public:
       if (Sym.getSize() > RemainingBlockSize)
         Sym.setSize(RemainingBlockSize);
     }
+    if (&DestBlock.getSection() != &OldSection) {
+      OldSection.removeSymbol(Sym);
+      DestBlock.getSection().addSymbol(Sym);
+    }
   }
 
   /// Transfers the given Block and all Symbols pointing to it to the given
@@ -1280,6 +1305,8 @@ public:
                      bool PreserveSrcSection = false) {
     if (&DstSection == &SrcSection)
       return;
+    for (auto *B : SrcSection.blocks())
+      B->setSection(DstSection);
     SrcSection.transferContentTo(DstSection);
     if (!PreserveSrcSection)
       removeSection(SrcSection);
@@ -1345,6 +1372,13 @@ public:
     Sections.erase(I);
   }
 
+  /// Accessor for the AllocActions object for this graph. This can be used to
+  /// register allocation action calls prior to finalization.
+  ///
+  /// Accessing this object after finalization will result in undefined
+  /// behavior.
+  JITLinkMemoryManager::AllocActions &allocActions() { return AAs; }
+
   /// Dump the graph.
   void dump(raw_ostream &OS);
 
@@ -1361,6 +1395,7 @@ private:
   SectionList Sections;
   ExternalSymbolSet ExternalSymbols;
   ExternalSymbolSet AbsoluteSymbols;
+  JITLinkMemoryManager::AllocActions AAs;
 };
 
 inline MutableArrayRef<char> Block::getMutableContent(LinkGraph &G) {
@@ -1650,8 +1685,7 @@ public:
   /// finalized (i.e. emitted to memory and memory permissions set). If all of
   /// this objects dependencies have also been finalized then the code is ready
   /// to run.
-  virtual void
-  notifyFinalized(std::unique_ptr<JITLinkMemoryManager::Allocation> A) = 0;
+  virtual void notifyFinalized(JITLinkMemoryManager::FinalizedAlloc Alloc) = 0;
 
   /// Called by JITLink prior to linking to determine whether default passes for
   /// the target should be added. The default implementation returns true.
@@ -1683,6 +1717,36 @@ Error markAllSymbolsLive(LinkGraph &G);
 Error makeTargetOutOfRangeError(const LinkGraph &G, const Block &B,
                                 const Edge &E);
 
+/// Base case for edge-visitors where the visitor-list is empty.
+inline void visitEdge(LinkGraph &G, Block *B, Edge &E) {}
+
+/// Applies the first visitor in the list to the given edge. If the visitor's
+/// visitEdge method returns true then we return immediately, otherwise we
+/// apply the next visitor.
+template <typename VisitorT, typename... VisitorTs>
+void visitEdge(LinkGraph &G, Block *B, Edge &E, VisitorT &&V,
+               VisitorTs &&...Vs) {
+  if (!V.visitEdge(G, B, E))
+    visitEdge(G, B, E, std::forward<VisitorTs>(Vs)...);
+}
+
+/// For each edge in the given graph, apply a list of visitors to the edge,
+/// stopping when the first visitor's visitEdge method returns true.
+///
+/// Only visits edges that were in the graph at call time: if any visitor
+/// adds new edges those will not be visited. Visitors are not allowed to
+/// remove edges (though they can change their kind, target, and addend).
+template <typename... VisitorTs>
+void visitExistingEdges(LinkGraph &G, VisitorTs &&...Vs) {
+  // We may add new blocks during this process, but we don't want to iterate
+  // over them, so build a worklist.
+  std::vector<Block *> Worklist(G.blocks().begin(), G.blocks().end());
+
+  for (auto *B : Worklist)
+    for (auto &E : B->edges())
+      visitEdge(G, B, E, std::forward<VisitorTs>(Vs)...);
+}
+
 /// Create a LinkGraph from the given object buffer.
 ///
 /// Note: The graph does not take ownership of the underlying buffer, nor copy
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h b/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h
index cee7d6b09c48..62c271dfc0b2 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h
@@ -13,106 +13,416 @@
 #ifndef LLVM_EXECUTIONENGINE_JITLINK_JITLINKMEMORYMANAGER_H
 #define LLVM_EXECUTIONENGINE_JITLINK_JITLINKMEMORYMANAGER_H
 
-#include "llvm/ADT/DenseMap.h"
 #include "llvm/ExecutionEngine/JITLink/JITLinkDylib.h"
+#include "llvm/ExecutionEngine/JITLink/MemoryFlags.h"
 #include "llvm/ExecutionEngine/JITSymbol.h"
+#include "llvm/Support/Allocator.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/MSVCErrorWorkarounds.h"
 #include "llvm/Support/Memory.h"
+#include "llvm/Support/RecyclingAllocator.h"
 
 #include <cstdint>
 #include <future>
+#include <mutex>
 
 namespace llvm {
 namespace jitlink {
 
+class Block;
+class LinkGraph;
+class Section;
+
 /// Manages allocations of JIT memory.
 ///
 /// Instances of this class may be accessed concurrently from multiple threads
 /// and their implemetations should include any necessary synchronization.
 class JITLinkMemoryManager {
 public:
-  using ProtectionFlags = sys::Memory::ProtectionFlags;
+  /// Represents a call to a graph-memory-management support function in the
+  /// executor.
+  ///
+  /// Support functions are called as:
+  ///
+  ///   auto *Result =
+  ///       ((char*(*)(const void*, size_t))FnAddr)(
+  ///           (const void*)CtxAddr, (size_t)CtxSize)
+  ///
+  /// A null result is interpreted as success.
+  ///
+  /// A non-null result is interpreted as a heap-allocated string containing
+  /// an error message to report to the allocator (the allocator's
+  /// executor-side implementation code is responsible for freeing the error
+  /// string).
+  struct AllocActionCall {
+    JITTargetAddress FnAddr = 0;
+    JITTargetAddress CtxAddr = 0;
+    JITTargetAddress CtxSize = 0;
+  };
+
+  /// A pair of AllocActionCalls, one to be run at finalization time, one to be
+  /// run at deallocation time.
+  ///
+  /// AllocActionCallPairs should be constructed for paired operations (e.g.
+  /// __register_ehframe and __deregister_ehframe for eh-frame registration).
+  /// See comments for AllocActions for execution ordering.
+  ///
+  /// For unpaired operations one or the other member can be left unused, as
+  /// AllocationActionCalls with an FnAddr of zero will be skipped.
+  struct AllocActionCallPair {
+    AllocActionCall Finalize;
+    AllocActionCall Dealloc;
+  };
+
+  /// A vector of allocation actions to be run for this allocation.
+  ///
+  /// Finalize allocations will be run in order at finalize time. Dealloc
+  /// actions will be run in reverse order at deallocation time.
+  using AllocActions = std::vector<AllocActionCallPair>;
+
+  /// Represents a finalized allocation.
+  ///
+  /// Finalized allocations must be passed to the
+  /// JITLinkMemoryManager:deallocate method prior to being destroyed.
+  ///
+  /// The interpretation of the Address associated with the finalized allocation
+  /// is up to the memory manager implementation. Common options are using the
+  /// base address of the allocation, or the address of a memory management
+  /// object that tracks the allocation.
+  class FinalizedAlloc {
+    friend class JITLinkMemoryManager;
 
-  class SegmentRequest {
   public:
-    SegmentRequest() = default;
-    SegmentRequest(uint64_t Alignment, size_t ContentSize,
-                   uint64_t ZeroFillSize)
-        : Alignment(Alignment), ContentSize(ContentSize),
-          ZeroFillSize(ZeroFillSize) {
-      assert(isPowerOf2_32(Alignment) && "Alignment must be power of 2");
+    static constexpr JITTargetAddress InvalidAddr = ~JITTargetAddress(0);
+
+    FinalizedAlloc() = default;
+    explicit FinalizedAlloc(JITTargetAddress A) : A(A) {
+      assert(A != 0 && "Explicitly creating an invalid allocation?");
+    }
+    FinalizedAlloc(const FinalizedAlloc &) = delete;
+    FinalizedAlloc(FinalizedAlloc &&Other) : A(Other.A) {
+      Other.A = InvalidAddr;
+    }
+    FinalizedAlloc &operator=(const FinalizedAlloc &) = delete;
+    FinalizedAlloc &operator=(FinalizedAlloc &&Other) {
+      assert(A == InvalidAddr &&
+             "Cannot overwrite active finalized allocation");
+      std::swap(A, Other.A);
+      return *this;
+    }
+    ~FinalizedAlloc() {
+      assert(A == InvalidAddr && "Finalized allocation was not deallocated");
+    }
+
+    /// FinalizedAllocs convert to false for default-constructed, and
+    /// true otherwise. Default-constructed allocs need not be deallocated.
+    explicit operator bool() const { return A != InvalidAddr; }
+
+    /// Returns the address associated with this finalized allocation.
+    /// The allocation is unmodified.
+    JITTargetAddress getAddress() const { return A; }
+
+    /// Returns the address associated with this finalized allocation and
+    /// resets this object to the default state.
+    /// This should only be used by allocators when deallocating memory.
+    JITTargetAddress release() {
+      JITTargetAddress Tmp = A;
+      A = InvalidAddr;
+      return Tmp;
     }
-    uint64_t getAlignment() const { return Alignment; }
-    size_t getContentSize() const { return ContentSize; }
-    uint64_t getZeroFillSize() const { return ZeroFillSize; }
+
   private:
-    uint64_t Alignment = 0;
-    size_t ContentSize = 0;
-    uint64_t ZeroFillSize = 0;
+    JITTargetAddress A = InvalidAddr;
   };
 
-  using SegmentsRequestMap = DenseMap<unsigned, SegmentRequest>;
-
-  /// Represents an allocation created by the memory manager.
+  /// Represents an allocation which has not been finalized yet.
   ///
-  /// An allocation object is responsible for allocating and owning jit-linker
-  /// working and target memory, and for transfering from working to target
-  /// memory.
+  /// InFlightAllocs manage both executor memory allocations and working
+  /// memory allocations.
   ///
-  class Allocation {
+  /// On finalization, the InFlightAlloc should transfer the content of
+  /// working memory into executor memory, apply memory protections, and
+  /// run any finalization functions.
+  ///
+  /// Working memory should be kept alive at least until one of the following
+  /// happens: (1) the InFlightAlloc instance is destroyed, (2) the
+  /// InFlightAlloc is abandoned, (3) finalized target memory is destroyed.
+  ///
+  /// If abandon is called then working memory and executor memory should both
+  /// be freed.
+  class InFlightAlloc {
   public:
-    using FinalizeContinuation = std::function<void(Error)>;
-
-    virtual ~Allocation();
+    using OnFinalizedFunction = unique_function<void(Expected<FinalizedAlloc>)>;
+    using OnAbandonedFunction = unique_function<void(Error)>;
 
-    /// Should return the address of linker working memory for the segment with
-    /// the given protection flags.
-    virtual MutableArrayRef<char> getWorkingMemory(ProtectionFlags Seg) = 0;
+    virtual ~InFlightAlloc();
 
-    /// Should return the final address in the target process where the segment
-    /// will reside.
-    virtual JITTargetAddress getTargetMemory(ProtectionFlags Seg) = 0;
+    /// Called prior to finalization if the allocation should be abandoned.
+    virtual void abandon(OnAbandonedFunction OnAbandoned) = 0;
 
-    /// Should transfer from working memory to target memory, and release
-    /// working memory.
-    virtual void finalizeAsync(FinalizeContinuation OnFinalize) = 0;
+    /// Called to transfer working memory to the target and apply finalization.
+    virtual void finalize(OnFinalizedFunction OnFinalized) = 0;
 
-    /// Calls finalizeAsync and waits for completion.
-    Error finalize() {
-      std::promise<MSVCPError> FinalizeResultP;
+    /// Synchronous convenience version of finalize.
+    Expected<FinalizedAlloc> finalize() {
+      std::promise<MSVCPExpected<FinalizedAlloc>> FinalizeResultP;
       auto FinalizeResultF = FinalizeResultP.get_future();
-      finalizeAsync(
-          [&](Error Err) { FinalizeResultP.set_value(std::move(Err)); });
+      finalize([&](Expected<FinalizedAlloc> Result) {
+        FinalizeResultP.set_value(std::move(Result));
+      });
       return FinalizeResultF.get();
     }
-
-    /// Should deallocate target memory.
-    virtual Error deallocate() = 0;
   };
 
+  /// Typedef for the argument to be passed to OnAllocatedFunction.
+  using AllocResult = Expected<std::unique_ptr<InFlightAlloc>>;
+
+  /// Called when allocation has been completed.
+  using OnAllocatedFunction = unique_function<void(AllocResult)>;
+
+  /// Called when deallocation has completed.
+  using OnDeallocatedFunction = unique_function<void(Error)>;
+
   virtual ~JITLinkMemoryManager();
 
-  /// Create an Allocation object.
+  /// Start the allocation process.
   ///
-  /// The JD argument represents the target JITLinkDylib, and can be used by
-  /// JITLinkMemoryManager implementers to manage per-dylib allocation pools
-  /// (e.g. one pre-reserved address space slab per dylib to ensure that all
-  /// allocations for the dylib are within a certain range). The JD argument
-  /// may be null (representing an allocation not associated with any
-  /// JITDylib.
+  /// If the initial allocation is successful then the OnAllocated function will
+  /// be called with a std::unique_ptr<InFlightAlloc> value. If the assocation
+  /// is unsuccessful then the OnAllocated function will be called with an
+  /// Error.
+  virtual void allocate(const JITLinkDylib *JD, LinkGraph &G,
+                        OnAllocatedFunction OnAllocated) = 0;
+
+  /// Convenience function for blocking allocation.
+  AllocResult allocate(const JITLinkDylib *JD, LinkGraph &G) {
+    std::promise<MSVCPExpected<std::unique_ptr<InFlightAlloc>>> AllocResultP;
+    auto AllocResultF = AllocResultP.get_future();
+    allocate(JD, G, [&](AllocResult Alloc) {
+      AllocResultP.set_value(std::move(Alloc));
+    });
+    return AllocResultF.get();
+  }
+
+  /// Deallocate a list of allocation objects.
   ///
-  /// The request argument describes the segment sizes and permisssions being
-  /// requested.
-  virtual Expected<std::unique_ptr<Allocation>>
-  allocate(const JITLinkDylib *JD, const SegmentsRequestMap &Request) = 0;
+  /// Dealloc actions will be run in reverse order (from the end of the vector
+  /// to the start).
+  virtual void deallocate(std::vector<FinalizedAlloc> Allocs,
+                          OnDeallocatedFunction OnDeallocated) = 0;
+
+  /// Convenience function for deallocation of a single alloc.
+  void deallocate(FinalizedAlloc Alloc, OnDeallocatedFunction OnDeallocated) {
+    std::vector<FinalizedAlloc> Allocs;
+    Allocs.push_back(std::move(Alloc));
+    deallocate(std::move(Allocs), std::move(OnDeallocated));
+  }
+
+  /// Convenience function for blocking deallocation.
+  Error deallocate(std::vector<FinalizedAlloc> Allocs) {
+    std::promise<MSVCPError> DeallocResultP;
+    auto DeallocResultF = DeallocResultP.get_future();
+    deallocate(std::move(Allocs),
+               [&](Error Err) { DeallocResultP.set_value(std::move(Err)); });
+    return DeallocResultF.get();
+  }
+
+  /// Convenience function for blocking deallocation of a single alloc.
+  Error deallocate(FinalizedAlloc Alloc) {
+    std::vector<FinalizedAlloc> Allocs;
+    Allocs.push_back(std::move(Alloc));
+    return deallocate(std::move(Allocs));
+  }
+};
+
+/// BasicLayout simplifies the implementation of JITLinkMemoryManagers.
+///
+/// BasicLayout groups Sections into Segments based on their memory protection
+/// and deallocation policies. JITLinkMemoryManagers can construct a BasicLayout
+/// from a Graph, and then assign working memory and addresses to each of the
+/// Segments. These addreses will be mapped back onto the Graph blocks in
+/// the apply method.
+class BasicLayout {
+public:
+  /// The Alignment, ContentSize and ZeroFillSize of each segment will be
+  /// pre-filled from the Graph. Clients must set the Addr and WorkingMem fields
+  /// prior to calling apply.
+  //
+  // FIXME: The C++98 initializer is an attempt to work around compile failures
+  // due to http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1397.
+  // We should be able to switch this back to member initialization once that
+  // issue is fixed.
+  class Segment {
+    friend class BasicLayout;
+
+  public:
+    Segment()
+        : ContentSize(0), ZeroFillSize(0), Addr(0), WorkingMem(nullptr),
+          NextWorkingMemOffset(0) {}
+    Align Alignment;
+    size_t ContentSize;
+    uint64_t ZeroFillSize;
+    JITTargetAddress Addr;
+    char *WorkingMem = nullptr;
+
+  private:
+    size_t NextWorkingMemOffset;
+    std::vector<Block *> ContentBlocks, ZeroFillBlocks;
+  };
+
+  /// A convenience class that further groups segments based on memory
+  /// deallocation policy. This allows clients to make two slab allocations:
+  /// one for all standard segments, and one for all finalize segments.
+  struct ContiguousPageBasedLayoutSizes {
+    uint64_t StandardSegs = 0;
+    uint64_t FinalizeSegs = 0;
+
+    uint64_t total() const { return StandardSegs + FinalizeSegs; }
+  };
+
+private:
+  using SegmentMap = AllocGroupSmallMap<Segment>;
+
+public:
+  BasicLayout(LinkGraph &G);
+
+  /// Return a reference to the graph this allocation was created from.
+  LinkGraph &getGraph() { return G; }
+
+  /// Returns the total number of required to allocate all segments (with each
+  /// segment padded out to page size) for all standard segments, and all
+  /// finalize segments.
+  ///
+  /// This is a convenience function for the common case where the segments will
+  /// be allocated contiguously.
+  ///
+  /// This function will return an error if any segment has an alignment that
+  /// is higher than a page.
+  Expected<ContiguousPageBasedLayoutSizes>
+  getContiguousPageBasedLayoutSizes(uint64_t PageSize);
+
+  /// Returns an iterator over the segments of the layout.
+  iterator_range<SegmentMap::iterator> segments() {
+    return {Segments.begin(), Segments.end()};
+  }
+
+  /// Apply the layout to the graph.
+  Error apply();
+
+  /// Returns a reference to the AllocActions in the graph.
+  /// This convenience function saves callers from having to #include
+  /// LinkGraph.h if all they need are allocation actions.
+  JITLinkMemoryManager::AllocActions &graphAllocActions();
+
+private:
+  LinkGraph &G;
+  SegmentMap Segments;
+};
+
+/// A utility class for making simple allocations using JITLinkMemoryManager.
+///
+/// SimpleSegementAlloc takes a mapping of AllocGroups to Segments and uses
+/// this to create a LinkGraph with one Section (containing one Block) per
+/// Segment. Clients can obtain a pointer to the working memory and executor
+/// address of that block using the Segment's AllocGroup. Once memory has been
+/// populated, clients can call finalize to finalize the memory.
+class SimpleSegmentAlloc {
+public:
+  /// Describes a segment to be allocated.
+  struct Segment {
+    Segment() = default;
+    Segment(size_t ContentSize, Align ContentAlign)
+        : ContentSize(ContentSize), ContentAlign(ContentAlign) {}
+
+    size_t ContentSize = 0;
+    Align ContentAlign;
+  };
+
+  /// Describes the segment working memory and executor address.
+  struct SegmentInfo {
+    JITTargetAddress Addr = 0;
+    MutableArrayRef<char> WorkingMem;
+  };
+
+  using SegmentMap = AllocGroupSmallMap<Segment>;
+
+  using OnCreatedFunction = unique_function<void(Expected<SimpleSegmentAlloc>)>;
+
+  using OnFinalizedFunction =
+      JITLinkMemoryManager::InFlightAlloc::OnFinalizedFunction;
+
+  static void Create(JITLinkMemoryManager &MemMgr, const JITLinkDylib *JD,
+                     SegmentMap Segments, OnCreatedFunction OnCreated);
+
+  static Expected<SimpleSegmentAlloc> Create(JITLinkMemoryManager &MemMgr,
+                                             const JITLinkDylib *JD,
+                                             SegmentMap Segments);
+
+  SimpleSegmentAlloc(SimpleSegmentAlloc &&);
+  SimpleSegmentAlloc &operator=(SimpleSegmentAlloc &&);
+  ~SimpleSegmentAlloc();
+
+  /// Returns the SegmentInfo for the given group.
+  SegmentInfo getSegInfo(AllocGroup AG);
+
+  /// Finalize all groups (async version).
+  void finalize(OnFinalizedFunction OnFinalized) {
+    Alloc->finalize(std::move(OnFinalized));
+  }
+
+  /// Finalize all groups.
+  Expected<JITLinkMemoryManager::FinalizedAlloc> finalize() {
+    return Alloc->finalize();
+  }
+
+private:
+  SimpleSegmentAlloc(
+      std::unique_ptr<LinkGraph> G, AllocGroupSmallMap<Block *> ContentBlocks,
+      std::unique_ptr<JITLinkMemoryManager::InFlightAlloc> Alloc);
+
+  std::unique_ptr<LinkGraph> G;
+  AllocGroupSmallMap<Block *> ContentBlocks;
+  std::unique_ptr<JITLinkMemoryManager::InFlightAlloc> Alloc;
 };
 
 /// A JITLinkMemoryManager that allocates in-process memory.
 class InProcessMemoryManager : public JITLinkMemoryManager {
 public:
-  Expected<std::unique_ptr<Allocation>>
-  allocate(const JITLinkDylib *JD, const SegmentsRequestMap &Request) override;
+  class IPInFlightAlloc;
+
+  /// Attempts to auto-detect the host page size.
+  static Expected<std::unique_ptr<InProcessMemoryManager>> Create();
+
+  /// Create an instance using the given page size.
+  InProcessMemoryManager(uint64_t PageSize) : PageSize(PageSize) {}
+
+  void allocate(const JITLinkDylib *JD, LinkGraph &G,
+                OnAllocatedFunction OnAllocated) override;
+
+  // Use overloads from base class.
+  using JITLinkMemoryManager::allocate;
+
+  void deallocate(std::vector<FinalizedAlloc> Alloc,
+                  OnDeallocatedFunction OnDeallocated) override;
+
+  // Use overloads from base class.
+  using JITLinkMemoryManager::deallocate;
+
+private:
+  // FIXME: Use an in-place array instead of a vector for DeallocActions.
+  //        There shouldn't need to be a heap alloc for this.
+  struct FinalizedAllocInfo {
+    sys::MemoryBlock StandardSegments;
+    std::vector<AllocActionCall> DeallocActions;
+  };
+
+  FinalizedAlloc
+  createFinalizedAlloc(sys::MemoryBlock StandardSegments,
+                       std::vector<AllocActionCall> DeallocActions);
+
+  uint64_t PageSize;
+  std::mutex FinalizedAllocsMutex;
+  RecyclingAllocator<BumpPtrAllocator, FinalizedAllocInfo> FinalizedAllocInfos;
 };
 
 } // end namespace jitlink
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/MachO_arm64.h b/llvm/include/llvm/ExecutionEngine/JITLink/MachO_arm64.h
index ecbc93e1467d..aee14c0d1fe5 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/MachO_arm64.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/MachO_arm64.h
@@ -29,6 +29,8 @@ enum MachOARM64RelocationKind : Edge::Kind {
   PageOffset12,
   GOTPage21,
   GOTPageOffset12,
+  TLVPage21,
+  TLVPageOffset12,
   PointerToGOT,
   PairedAddend,
   LDRLiteral19,
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/MemoryFlags.h b/llvm/include/llvm/ExecutionEngine/JITLink/MemoryFlags.h
new file mode 100644
index 000000000000..8fdce93ebc56
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/MemoryFlags.h
@@ -0,0 +1,225 @@
+//===-------- MemoryFlags.h - Memory allocation flags -----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines types and operations related to memory protection and allocation
+// lifetimes.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_JITLINK_MEMORYFLAGS_H
+#define LLVM_EXECUTIONENGINE_JITLINK_MEMORYFLAGS_H
+
+#include "llvm/ADT/BitmaskEnum.h"
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/Support/Memory.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+namespace jitlink {
+
+/// Describes Read/Write/Exec permissions for memory.
+enum class MemProt {
+  None = 0,
+  Read = 1U << 0,
+  Write = 1U << 1,
+  Exec = 1U << 2,
+  LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue = */ Exec)
+};
+
+/// Print a MemProt as an RWX triple.
+raw_ostream &operator<<(raw_ostream &OS, MemProt MP);
+
+/// Convert a MemProt value to a corresponding sys::Memory::ProtectionFlags
+/// value.
+inline sys::Memory::ProtectionFlags toSysMemoryProtectionFlags(MemProt MP) {
+  std::underlying_type_t<sys::Memory::ProtectionFlags> PF = 0;
+  if ((MP & MemProt::Read) != MemProt::None)
+    PF |= sys::Memory::MF_READ;
+  if ((MP & MemProt::Write) != MemProt::None)
+    PF |= sys::Memory::MF_WRITE;
+  if ((MP & MemProt::Exec) != MemProt::None)
+    PF |= sys::Memory::MF_EXEC;
+  return static_cast<sys::Memory::ProtectionFlags>(PF);
+}
+
+/// Convert a sys::Memory::ProtectionFlags value to a corresponding MemProt
+/// value.
+inline MemProt fromSysMemoryProtectionFlags(sys::Memory::ProtectionFlags PF) {
+  MemProt MP = MemProt::None;
+  if (PF & sys::Memory::MF_READ)
+    MP |= MemProt::Read;
+  if (PF & sys::Memory::MF_WRITE)
+    MP |= MemProt::Write;
+  if (PF & sys::Memory::MF_EXEC)
+    MP |= MemProt::None;
+  return MP;
+}
+
+/// Describes a memory deallocation policy for memory to be allocated by a
+/// JITLinkMemoryManager.
+///
+/// All memory allocated by a call to JITLinkMemoryManager::allocate should be
+/// deallocated if a call is made to
+/// JITLinkMemoryManager::InFlightAllocation::abandon. The policies below apply
+/// to finalized allocations.
+enum class MemDeallocPolicy {
+  /// Standard memory should be deallocated when the deallocate method is called
+  /// for the finalized allocation.
+  Standard,
+
+  /// Finalize memory should be overwritten and then deallocated after all
+  /// finalization functions have been run.
+  Finalize
+};
+
+/// Print a MemDeallocPolicy.
+raw_ostream &operator<<(raw_ostream &OS, MemDeallocPolicy MDP);
+
+/// A pair of memory protections and allocation policies.
+///
+/// Optimized for use as a small map key.
+class AllocGroup {
+  friend struct llvm::DenseMapInfo<AllocGroup>;
+
+  using underlying_type = uint8_t;
+  static constexpr unsigned BitsForProt = 3;
+  static constexpr unsigned BitsForDeallocPolicy = 1;
+  static constexpr unsigned MaxIdentifiers =
+      1U << (BitsForProt + BitsForDeallocPolicy);
+
+public:
+  static constexpr unsigned NumGroups = MaxIdentifiers;
+
+  /// Create a default AllocGroup. No memory protections, standard
+  /// deallocation policy.
+  AllocGroup() = default;
+
+  /// Create an AllocGroup from a MemProt only -- uses
+  /// MemoryDeallocationPolicy::Standard.
+  AllocGroup(MemProt MP) : Id(static_cast<underlying_type>(MP)) {}
+
+  /// Create an AllocGroup from a MemProt and a MemoryDeallocationPolicy.
+  AllocGroup(MemProt MP, MemDeallocPolicy MDP)
+      : Id(static_cast<underlying_type>(MP) |
+           (static_cast<underlying_type>(MDP) << BitsForProt)) {}
+
+  /// Returns the MemProt for this group.
+  MemProt getMemProt() const {
+    return static_cast<MemProt>(Id & ((1U << BitsForProt) - 1));
+  }
+
+  /// Returns the MemoryDeallocationPolicy for this group.
+  MemDeallocPolicy getMemDeallocPolicy() const {
+    return static_cast<MemDeallocPolicy>(Id >> BitsForProt);
+  }
+
+  friend bool operator==(const AllocGroup &LHS, const AllocGroup &RHS) {
+    return LHS.Id == RHS.Id;
+  }
+
+  friend bool operator!=(const AllocGroup &LHS, const AllocGroup &RHS) {
+    return !(LHS == RHS);
+  }
+
+  friend bool operator<(const AllocGroup &LHS, const AllocGroup &RHS) {
+    return LHS.Id < RHS.Id;
+  }
+
+private:
+  AllocGroup(underlying_type RawId) : Id(RawId) {}
+  underlying_type Id = 0;
+};
+
+/// A specialized small-map for AllocGroups.
+///
+/// Iteration order is guaranteed to match key ordering.
+template <typename T> class AllocGroupSmallMap {
+private:
+  using ElemT = std::pair<AllocGroup, T>;
+  using VectorTy = SmallVector<ElemT, 4>;
+
+  static bool compareKey(const ElemT &E, const AllocGroup &G) {
+    return E.first < G;
+  }
+
+public:
+  using iterator = typename VectorTy::iterator;
+
+  AllocGroupSmallMap() = default;
+  AllocGroupSmallMap(std::initializer_list<std::pair<AllocGroup, T>> Inits) {
+    Elems.reserve(Inits.size());
+    for (const auto &E : Inits)
+      Elems.push_back(E);
+    llvm::sort(Elems, [](const ElemT &LHS, const ElemT &RHS) {
+      return LHS.first < RHS.first;
+    });
+  }
+
+  iterator begin() { return Elems.begin(); }
+  iterator end() { return Elems.end(); }
+  iterator find(AllocGroup G) {
+    auto I = lower_bound(Elems, G, compareKey);
+    return (I->first == G) ? I : end();
+  }
+
+  bool empty() const { return Elems.empty(); }
+  size_t size() const { return Elems.size(); }
+
+  T &operator[](AllocGroup G) {
+    auto I = lower_bound(Elems, G, compareKey);
+    if (I == Elems.end() || I->first != G)
+      I = Elems.insert(I, std::make_pair(G, T()));
+    return I->second;
+  }
+
+private:
+  VectorTy Elems;
+};
+
+/// Print an AllocGroup.
+raw_ostream &operator<<(raw_ostream &OS, AllocGroup AG);
+
+} // end namespace jitlink
+
+template <> struct DenseMapInfo<jitlink::MemProt> {
+  static inline jitlink::MemProt getEmptyKey() {
+    return jitlink::MemProt(~uint8_t(0));
+  }
+  static inline jitlink::MemProt getTombstoneKey() {
+    return jitlink::MemProt(~uint8_t(0) - 1);
+  }
+  static unsigned getHashValue(const jitlink::MemProt &Val) {
+    using UT = std::underlying_type_t<jitlink::MemProt>;
+    return DenseMapInfo<UT>::getHashValue(static_cast<UT>(Val));
+  }
+  static bool isEqual(const jitlink::MemProt &LHS,
+                      const jitlink::MemProt &RHS) {
+    return LHS == RHS;
+  }
+};
+
+template <> struct DenseMapInfo<jitlink::AllocGroup> {
+  static inline jitlink::AllocGroup getEmptyKey() {
+    return jitlink::AllocGroup(~uint8_t(0));
+  }
+  static inline jitlink::AllocGroup getTombstoneKey() {
+    return jitlink::AllocGroup(~uint8_t(0) - 1);
+  }
+  static unsigned getHashValue(const jitlink::AllocGroup &Val) {
+    return DenseMapInfo<jitlink::AllocGroup::underlying_type>::getHashValue(
+        Val.Id);
+  }
+  static bool isEqual(const jitlink::AllocGroup &LHS,
+                      const jitlink::AllocGroup &RHS) {
+    return LHS == RHS;
+  }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_JITLINK_MEMORYFLAGS_H
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/TableManager.h b/llvm/include/llvm/ExecutionEngine/JITLink/TableManager.h
new file mode 100644
index 000000000000..c20f62d515ec
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/TableManager.h
@@ -0,0 +1,63 @@
+//===---------------------- TableManager.h ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Fix edge for edge that needs an entry to reference the target symbol
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_JITLINK_TABLEMANAGER_H
+#define LLVM_EXECUTIONENGINE_JITLINK_TABLEMANAGER_H
+
+#include "llvm/ExecutionEngine/JITLink/JITLink.h"
+#include "llvm/Support/Debug.h"
+
+namespace llvm {
+namespace jitlink {
+
+/// A CRTP base for tables that are built on demand, e.g. Global Offset Tables
+/// and Procedure Linkage Tables.
+/// The getEntyrForTarget function returns the table entry corresponding to the
+/// given target, calling down to the implementation class to build an entry if
+/// one does not already exist.
+template <typename TableManagerImplT> class TableManager {
+public:
+  /// Return the constructed entry
+  ///
+  /// Use parameter G to construct the entry for target symbol
+  Symbol &getEntryForTarget(LinkGraph &G, Symbol &Target) {
+    assert(Target.hasName() && "Edge cannot point to anonymous target");
+
+    auto EntryI = Entries.find(Target.getName());
+
+    // Build the entry if it doesn't exist.
+    if (EntryI == Entries.end()) {
+      auto &Entry = impl().createEntry(G, Target);
+      DEBUG_WITH_TYPE("jitlink", {
+        dbgs() << "    Created" << impl().getSectionName() << "entry for "
+               << Target.getName() << ": " << Entry << "\n";
+      });
+      EntryI = Entries.insert(std::make_pair(Target.getName(), &Entry)).first;
+    }
+
+    assert(EntryI != Entries.end() && "Could not get entry symbol");
+    DEBUG_WITH_TYPE("jitlink", {
+      dbgs() << "    Using " << impl().getSectionName() << " entry "
+             << *EntryI->second << "\n";
+    });
+    return *EntryI->second;
+  }
+
+private:
+  TableManagerImplT &impl() { return static_cast<TableManagerImplT &>(*this); }
+  DenseMap<StringRef, Symbol *> Entries;
+};
+
+} // namespace jitlink
+} // namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/aarch64.h b/llvm/include/llvm/ExecutionEngine/JITLink/aarch64.h
new file mode 100644
index 000000000000..994ce783b058
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/aarch64.h
@@ -0,0 +1,38 @@
+//=== aarch64.h - Generic JITLink aarch64 edge kinds, utilities -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Generic utilities for graphs representing aarch64 objects.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_JITLINK_AARCH64_H
+#define LLVM_EXECUTIONENGINE_JITLINK_AARCH64_H
+
+#include "llvm/ExecutionEngine/JITLink/JITLink.h"
+
+namespace llvm {
+namespace jitlink {
+namespace aarch64 {
+
+/// Represets aarch64 fixups
+enum EdgeKind_aarch64 : Edge::Kind {
+
+  /// Set a CALL immediate field to bits [27:2] of X = Target - Fixup + Addend
+  R_AARCH64_CALL26 = Edge::FirstRelocation,
+
+};
+
+/// Returns a string name for the given aarch64 edge. For debugging purposes
+/// only
+const char *getEdgeKindName(Edge::Kind K);
+
+} // namespace aarch64
+} // namespace jitlink
+} // namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_JITLINK_AARCH64_H
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/riscv.h b/llvm/include/llvm/ExecutionEngine/JITLink/riscv.h
index a4509f3888a4..b8d08d88c1c9 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/riscv.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/riscv.h
@@ -70,7 +70,19 @@ enum EdgeKind_riscv : Edge::Kind {
   ///
   /// Fixup expression:
   ///   Fixup <- (Target - Fixup + Addend)
-  R_RISCV_CALL
+  R_RISCV_CALL,
+
+  /// PC relative GOT offset
+  ///
+  /// Fixup expression:
+  ///   Fixup <- (GOT - Fixup + Addend) >> 12
+  R_RISCV_GOT_HI20,
+
+  /// PC relative call by PLT
+  ///
+  /// Fixup expression:
+  ///   Fixup <- (Target - Fixup + Addend)
+  R_RISCV_CALL_PLT
 
 };
 
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/x86_64.h b/llvm/include/llvm/ExecutionEngine/JITLink/x86_64.h
index 006d983537e9..3130ea381534 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/x86_64.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/x86_64.h
@@ -14,6 +14,7 @@
 #define LLVM_EXECUTIONENGINE_JITLINK_X86_64_H
 
 #include "llvm/ExecutionEngine/JITLink/JITLink.h"
+#include "llvm/ExecutionEngine/JITLink/TableManager.h"
 
 #include <limits>
 
@@ -42,6 +43,16 @@ enum EdgeKind_x86_64 : Edge::Kind {
   ///
   Pointer32,
 
+  /// A signed 32-bit pointer value relocation
+  ///
+  /// Fixup expression:
+  ///   Fixup <- Target + Addend : int32
+  ///
+  /// Errors:
+  ///   - The target must reside in the signed 32-bits([-2**31, 2**32 - 1]) of
+  ///   the address space, otherwise an out-of-range error will be returned.
+  Pointer32Signed,
+
   /// A 64-bit delta.
   ///
   /// Delta from the fixup to the target.
@@ -85,6 +96,18 @@ enum EdgeKind_x86_64 : Edge::Kind {
   ///     an out-of-range error will be returned.
   NegDelta32,
 
+  /// A 64-bit GOT delta.
+  ///
+  /// Delta from the global offset table to the target
+  ///
+  /// Fixup expression:
+  ///   Fixup <- Target - GOTSymbol + Addend : int64
+  ///
+  /// Errors:
+  ///   - *ASSERTION* Failure to a null pointer GOTSymbol, which the GOT section
+  ///     symbol was not been defined.
+  Delta64FromGOT,
+
   /// A 32-bit PC-relative branch.
   ///
   /// Represents a PC-relative call or branch to a target. This can be used to
@@ -120,7 +143,7 @@ enum EdgeKind_x86_64 : Edge::Kind {
   /// This edge kind has the same fixup expression as BranchPCRel32, but further
   /// identifies the call/branch as being to a pointer jump stub. For edges of
   /// this kind the jump stub should not be bypassed (use
-  /// BranchPCRel32ToPtrJumpStubRelaxable for that), but the pointer location
+  /// BranchPCRel32ToPtrJumpStubBypassable for that), but the pointer location
   /// target may be recorded to allow manipulation at runtime.
   ///
   /// Fixup expression:
@@ -136,7 +159,8 @@ enum EdgeKind_x86_64 : Edge::Kind {
   ///
   /// The edge kind has the same fixup expression as BranchPCRel32ToPtrJumpStub,
   /// but identifies the call/branch as being to a pointer jump stub that may be
-  /// bypassed if the ultimate target is within range of the fixup location.
+  /// bypassed with a direct jump to the ultimate target if the ultimate target
+  /// is within range of the fixup location.
   ///
   /// Fixup expression:
   ///   Fixup <- Target - Fixup + Addend - 4: int32
@@ -145,7 +169,7 @@ enum EdgeKind_x86_64 : Edge::Kind {
   ///   - The result of the fixup expression must fit into an int32, otherwise
   ///     an out-of-range error will be returned.
   ///
-  BranchPCRel32ToPtrJumpStubRelaxable,
+  BranchPCRel32ToPtrJumpStubBypassable,
 
   /// A GOT entry getter/constructor, transformed to Delta32 pointing at the GOT
   /// entry for the original target.
@@ -167,7 +191,62 @@ enum EdgeKind_x86_64 : Edge::Kind {
   ///
   RequestGOTAndTransformToDelta32,
 
-  /// A PC-relative reference to a GOT entry, relaxable if GOT entry target
+  /// A GOT entry getter/constructor, transformed to Delta64 pointing at the GOT
+  /// entry for the original target.
+  ///
+  /// Indicates that this edge should be transformed into a Delta64 targeting
+  /// the GOT entry for the edge's current target, maintaining the same addend.
+  /// A GOT entry for the target should be created if one does not already
+  /// exist.
+  ///
+  /// Edges of this kind are usually handled by a GOT builder pass inserted by
+  /// default.
+  ///
+  /// Fixup expression:
+  ///   NONE
+  ///
+  /// Errors:
+  ///   - *ASSERTION* Failure to handle edges of this kind prior to the fixup
+  ///     phase will result in an assert/unreachable during the fixup phase.
+  ///
+  RequestGOTAndTransformToDelta64,
+
+  /// A GOT entry offset within GOT getter/constructor, transformed to
+  /// Delta64FromGOT
+  /// pointing at the GOT entry for the original target
+  ///
+  /// Indicates that this edge should be transformed into a Delta64FromGOT
+  /// targeting
+  /// the GOT entry for the edge's current target, maintaining the same addend.
+  /// A GOT entry for the target should be created if one does not already
+  /// exist.
+  ///
+  /// Edges of this kind are usually handled by a GOT builder pass inserted by
+  /// default
+  ///
+  /// Fixup expression:
+  ///   NONE
+  ///
+  /// Errors:
+  ///   - *ASSERTION* Failure to handle edges of this kind prior to the fixup
+  ///     phase will result in an assert/unreachable during the fixup phase
+  RequestGOTAndTransformToDelta64FromGOT,
+
+  /// A PC-relative load of a GOT entry, relaxable if GOT entry target is
+  /// in-range of the fixup
+  ///
+  /// TODO: Explain the optimization
+  ///
+  /// Fixup expression
+  ///   Fixup <- Target - (Fixup + 4) + Addend : int32
+  ///
+  /// Errors:
+  ///   - The result of the fixup expression must fit into an int32, otherwise
+  ///     an out-of-range error will be returned.
+  //
+  PCRel32GOTLoadRelaxable,
+
+  /// A PC-relative REX load of a GOT entry, relaxable if GOT entry target
   /// is in-range of the fixup.
   ///
   /// If the GOT entry target is in-range of the fixup then the load from the
@@ -180,17 +259,39 @@ enum EdgeKind_x86_64 : Edge::Kind {
   ///   - The result of the fixup expression must fit into an int32, otherwise
   ///     an out-of-range error will be returned.
   ///
-  PCRel32GOTLoadRelaxable,
+  PCRel32GOTLoadREXRelaxable,
 
-  /// A GOT entry getter/constructor, transformed to PCRel32ToGOTLoadRelaxable
-  /// pointing at the GOT entry for the original target.
+  /// A GOT entry getter/constructor, transformed to
+  /// PCRel32ToGOTLoadREXRelaxable pointing at the GOT entry for the original
+  /// target.
   ///
-  /// Indicates that this edge should be transformed into a
-  /// PC32ToGOTLoadRelaxable targeting the GOT entry for the edge's current
-  /// target, maintaining the same addend. A GOT entry for the target should be
-  /// created if one does not already exist.
+  /// Indicates that this edge should be lowered to a PC32ToGOTLoadREXRelaxable
+  /// targeting the GOT entry for the edge's current target, maintaining the
+  /// same addend. A GOT entry for the target should be created if one does not
+  /// already exist.
   ///
-  /// Edges of this kind are usually handled by a GOT builder pass inserted by
+  /// Edges of this kind are usually lowered by a GOT builder pass inserted by
+  /// default.
+  ///
+  /// Fixup expression:
+  ///   NONE
+  ///
+  /// Errors:
+  ///   - *ASSERTION* Failure to handle edges of this kind prior to the fixup
+  ///     phase will result in an assert/unreachable during the fixup phase.
+  ///
+  RequestGOTAndTransformToPCRel32GOTLoadREXRelaxable,
+
+  /// A GOT entry getter/constructor, transformed to
+  /// PCRel32ToGOTLoadRelaxable pointing at the GOT entry for the original
+  /// target.
+  ///
+  /// Indicates that this edge should be lowered to a PC32ToGOTLoadRelaxable
+  /// targeting the GOT entry for the edge's current target, maintaining the
+  /// same addend. A GOT entry for the target should be created if one does not
+  /// already exist.
+  ///
+  /// Edges of this kind are usually lowered by a GOT builder pass inserted by
   /// default.
   ///
   /// Fixup expression:
@@ -202,10 +303,10 @@ enum EdgeKind_x86_64 : Edge::Kind {
   ///
   RequestGOTAndTransformToPCRel32GOTLoadRelaxable,
 
-  /// A PC-relative reference to a Thread Local Variable Pointer (TLVP) entry,
+  /// A PC-relative REX load of a Thread Local Variable Pointer (TLVP) entry,
   /// relaxable if the TLVP entry target is in-range of the fixup.
   ///
-  /// If the TLVP entry target is in-range of the fixup then the load frmo the
+  /// If the TLVP entry target is in-range of the fixup then the load from the
   /// TLVP may be replaced with a direct memory address calculation.
   ///
   /// The target of this edge must be a thread local variable entry of the form
@@ -222,15 +323,18 @@ enum EdgeKind_x86_64 : Edge::Kind {
   ///   - The target must be either external, or a TLV entry of the required
   ///     form, otherwise a malformed TLV entry error will be returned.
   ///
-  PCRel32TLVPLoadRelaxable,
+  PCRel32TLVPLoadREXRelaxable,
+
+  /// TODO: Explain the generic edge kind
+  RequestTLSDescInGOTAndTransformToDelta32,
 
   /// A TLVP entry getter/constructor, transformed to
-  /// Delta32ToTLVPLoadRelaxable.
+  /// Delta32ToTLVPLoadREXRelaxable.
   ///
   /// Indicates that this edge should be transformed into a
-  /// Delta32ToTLVPLoadRelaxable targeting the TLVP entry for the edge's current
-  /// target. A TLVP entry for the target should be created if one does not
-  /// already exist.
+  /// Delta32ToTLVPLoadREXRelaxable targeting the TLVP entry for the edge's
+  /// current target. A TLVP entry for the target should be created if one does
+  /// not already exist.
   ///
   /// Fixup expression:
   ///   NONE
@@ -239,7 +343,7 @@ enum EdgeKind_x86_64 : Edge::Kind {
   ///   - *ASSERTION* Failure to handle edges of this kind prior to the fixup
   ///     phase will result in an assert/unreachable during the fixup phase.
   ///
-  RequestTLVPAndTransformToPCRel32TLVPLoadRelaxable
+  RequestTLVPAndTransformToPCRel32TLVPLoadREXRelaxable
 };
 
 /// Returns a string name for the given x86-64 edge. For debugging purposes
@@ -258,7 +362,8 @@ inline bool isInRangeForImmS32(int64_t Value) {
 }
 
 /// Apply fixup expression for edge to block content.
-inline Error applyFixup(LinkGraph &G, Block &B, const Edge &E) {
+inline Error applyFixup(LinkGraph &G, Block &B, const Edge &E,
+                        const Symbol *GOTSymbol) {
   using namespace support;
 
   char *BlockWorkingMem = B.getAlreadyMutableContent().data();
@@ -281,12 +386,21 @@ inline Error applyFixup(LinkGraph &G, Block &B, const Edge &E) {
       return makeTargetOutOfRangeError(G, B, E);
     break;
   }
+  case Pointer32Signed: {
+    int64_t Value = E.getTarget().getAddress() + E.getAddend();
+    if (LLVM_LIKELY(isInRangeForImmS32(Value)))
+      *(little32_t *)FixupPtr = Value;
+    else
+      return makeTargetOutOfRangeError(G, B, E);
+    break;
+  }
 
   case BranchPCRel32:
   case BranchPCRel32ToPtrJumpStub:
-  case BranchPCRel32ToPtrJumpStubRelaxable:
+  case BranchPCRel32ToPtrJumpStubBypassable:
   case PCRel32GOTLoadRelaxable:
-  case PCRel32TLVPLoadRelaxable: {
+  case PCRel32GOTLoadREXRelaxable:
+  case PCRel32TLVPLoadREXRelaxable: {
     int64_t Value =
         E.getTarget().getAddress() - (FixupAddress + 4) + E.getAddend();
     if (LLVM_LIKELY(isInRangeForImmS32(Value)))
@@ -325,6 +439,13 @@ inline Error applyFixup(LinkGraph &G, Block &B, const Edge &E) {
       return makeTargetOutOfRangeError(G, B, E);
     break;
   }
+  case Delta64FromGOT: {
+    assert(GOTSymbol && "No GOT section symbol");
+    int64_t Value =
+        E.getTarget().getAddress() - GOTSymbol->getAddress() + E.getAddend();
+    *(little64_t *)FixupPtr = Value;
+    break;
+  }
 
   default: {
     // If you hit this you should check that *constructor and other non-fixup
@@ -395,6 +516,114 @@ inline Symbol &createAnonymousPointerJumpStub(LinkGraph &G,
       false);
 }
 
+/// Global Offset Table Builder.
+class GOTTableManager : public TableManager<GOTTableManager> {
+public:
+  static StringRef getSectionName() { return "$__GOT"; }
+
+  bool visitEdge(LinkGraph &G, Block *B, Edge &E) {
+    Edge::Kind KindToSet = Edge::Invalid;
+    switch (E.getKind()) {
+    case x86_64::Delta64FromGOT: {
+      // we need to make sure that the GOT section exists, but don't otherwise
+      // need to fix up this edge
+      getGOTSection(G);
+      return false;
+    }
+    case x86_64::RequestGOTAndTransformToPCRel32GOTLoadREXRelaxable:
+      KindToSet = x86_64::PCRel32GOTLoadREXRelaxable;
+      break;
+    case x86_64::RequestGOTAndTransformToPCRel32GOTLoadRelaxable:
+      KindToSet = x86_64::PCRel32GOTLoadRelaxable;
+      break;
+    case x86_64::RequestGOTAndTransformToDelta64:
+      KindToSet = x86_64::Delta64;
+      break;
+    case x86_64::RequestGOTAndTransformToDelta64FromGOT:
+      KindToSet = x86_64::Delta64FromGOT;
+      break;
+    case x86_64::RequestGOTAndTransformToDelta32:
+      KindToSet = x86_64::Delta32;
+      break;
+    default:
+      return false;
+    }
+    assert(KindToSet != Edge::Invalid &&
+           "Fell through switch, but no new kind to set");
+    DEBUG_WITH_TYPE("jitlink", {
+      dbgs() << "  Fixing " << G.getEdgeKindName(E.getKind()) << " edge at "
+             << formatv("{0:x}", B->getFixupAddress(E)) << " ("
+             << formatv("{0:x}", B->getAddress()) << " + "
+             << formatv("{0:x}", E.getOffset()) << ")\n";
+    });
+    E.setKind(KindToSet);
+    E.setTarget(getEntryForTarget(G, E.getTarget()));
+    return true;
+  }
+
+  Symbol &createEntry(LinkGraph &G, Symbol &Target) {
+    return createAnonymousPointer(G, getGOTSection(G), &Target);
+  }
+
+private:
+  Section &getGOTSection(LinkGraph &G) {
+    if (!GOTSection)
+      GOTSection = &G.createSection(getSectionName(), MemProt::Read);
+    return *GOTSection;
+  }
+
+  Section *GOTSection = nullptr;
+};
+
+/// Procedure Linkage Table Builder.
+class PLTTableManager : public TableManager<PLTTableManager> {
+public:
+  PLTTableManager(GOTTableManager &GOT) : GOT(GOT) {}
+
+  static StringRef getSectionName() { return "$__STUBS"; }
+
+  bool visitEdge(LinkGraph &G, Block *B, Edge &E) {
+    if (E.getKind() == x86_64::BranchPCRel32 && !E.getTarget().isDefined()) {
+      DEBUG_WITH_TYPE("jitlink", {
+        dbgs() << "  Fixing " << G.getEdgeKindName(E.getKind()) << " edge at "
+               << formatv("{0:x}", B->getFixupAddress(E)) << " ("
+               << formatv("{0:x}", B->getAddress()) << " + "
+               << formatv("{0:x}", E.getOffset()) << ")\n";
+      });
+      // Set the edge kind to Branch32ToPtrJumpStubBypassable to enable it to
+      // be optimized when the target is in-range.
+      E.setKind(x86_64::BranchPCRel32ToPtrJumpStubBypassable);
+      E.setTarget(getEntryForTarget(G, E.getTarget()));
+      return true;
+    }
+    return false;
+  }
+
+  Symbol &createEntry(LinkGraph &G, Symbol &Target) {
+    return createAnonymousPointerJumpStub(G, getStubsSection(G),
+                                          GOT.getEntryForTarget(G, Target));
+  }
+
+public:
+  Section &getStubsSection(LinkGraph &G) {
+    if (!PLTSection)
+      PLTSection =
+          &G.createSection(getSectionName(), MemProt::Read | MemProt::Exec);
+    return *PLTSection;
+  }
+
+  GOTTableManager &GOT;
+  Section *PLTSection = nullptr;
+};
+
+/// Optimize the GOT and Stub relocations if the edge target address is in range
+/// 1. PCRel32GOTLoadRelaxable. For this edge kind, if the target is in range,
+/// then replace GOT load with lea
+/// 2. BranchPCRel32ToPtrJumpStubRelaxable. For this edge kind, if the target is
+/// in range, replace a indirect jump by plt stub with a direct jump to the
+/// target
+Error optimizeGOTAndStubAccesses(LinkGraph &G);
+
 } // namespace x86_64
 } // end namespace jitlink
 } // end namespace llvm
diff --git a/llvm/include/llvm/ExecutionEngine/MCJIT.h b/llvm/include/llvm/ExecutionEngine/MCJIT.h
index 8253bf98963b..adce98f380c5 100644
--- a/llvm/include/llvm/ExecutionEngine/MCJIT.h
+++ b/llvm/include/llvm/ExecutionEngine/MCJIT.h
@@ -26,6 +26,9 @@ namespace {
       // delete it all as dead code, even with whole program optimization,
       // yet is effectively a NO-OP. As the compiler isn't smart enough
       // to know that getenv() never returns -1, this will do the job.
+      // This is so that globals in the translation units where these functions
+      // are defined are forced to be initialized, populating various
+      // registries.
       if (std::getenv("bar") != (char*) -1)
         return;
 
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Core.h b/llvm/include/llvm/ExecutionEngine/Orc/Core.h
index e832d8d57dfa..5cac65b49a05 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/Core.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Core.h
@@ -21,7 +21,7 @@
 #include "llvm/ExecutionEngine/JITSymbol.h"
 #include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h"
 #include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h"
-#include "llvm/ExecutionEngine/OrcV1Deprecation.h"
+#include "llvm/ExecutionEngine/Orc/TaskDispatch.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ExtensibleRTTI.h"
 
@@ -434,13 +434,16 @@ class SymbolsNotFound : public ErrorInfo<SymbolsNotFound> {
 public:
   static char ID;
 
-  SymbolsNotFound(SymbolNameSet Symbols);
-  SymbolsNotFound(SymbolNameVector Symbols);
+  SymbolsNotFound(std::shared_ptr<SymbolStringPool> SSP, SymbolNameSet Symbols);
+  SymbolsNotFound(std::shared_ptr<SymbolStringPool> SSP,
+                  SymbolNameVector Symbols);
   std::error_code convertToErrorCode() const override;
   void log(raw_ostream &OS) const override;
+  std::shared_ptr<SymbolStringPool> getSymbolStringPool() { return SSP; }
   const SymbolNameVector &getSymbols() const { return Symbols; }
 
 private:
+  std::shared_ptr<SymbolStringPool> SSP;
   SymbolNameVector Symbols;
 };
 
@@ -449,12 +452,15 @@ class SymbolsCouldNotBeRemoved : public ErrorInfo<SymbolsCouldNotBeRemoved> {
 public:
   static char ID;
 
-  SymbolsCouldNotBeRemoved(SymbolNameSet Symbols);
+  SymbolsCouldNotBeRemoved(std::shared_ptr<SymbolStringPool> SSP,
+                           SymbolNameSet Symbols);
   std::error_code convertToErrorCode() const override;
   void log(raw_ostream &OS) const override;
+  std::shared_ptr<SymbolStringPool> getSymbolStringPool() { return SSP; }
   const SymbolNameSet &getSymbols() const { return Symbols; }
 
 private:
+  std::shared_ptr<SymbolStringPool> SSP;
   SymbolNameSet Symbols;
 };
 
@@ -466,13 +472,17 @@ class MissingSymbolDefinitions : public ErrorInfo<MissingSymbolDefinitions> {
 public:
   static char ID;
 
-  MissingSymbolDefinitions(std::string ModuleName, SymbolNameVector Symbols)
-    : ModuleName(std::move(ModuleName)), Symbols(std::move(Symbols)) {}
+  MissingSymbolDefinitions(std::shared_ptr<SymbolStringPool> SSP,
+                           std::string ModuleName, SymbolNameVector Symbols)
+      : SSP(std::move(SSP)), ModuleName(std::move(ModuleName)),
+        Symbols(std::move(Symbols)) {}
   std::error_code convertToErrorCode() const override;
   void log(raw_ostream &OS) const override;
+  std::shared_ptr<SymbolStringPool> getSymbolStringPool() { return SSP; }
   const std::string &getModuleName() const { return ModuleName; }
   const SymbolNameVector &getSymbols() const { return Symbols; }
 private:
+  std::shared_ptr<SymbolStringPool> SSP;
   std::string ModuleName;
   SymbolNameVector Symbols;
 };
@@ -485,13 +495,17 @@ class UnexpectedSymbolDefinitions : public ErrorInfo<UnexpectedSymbolDefinitions
 public:
   static char ID;
 
-  UnexpectedSymbolDefinitions(std::string ModuleName, SymbolNameVector Symbols)
-    : ModuleName(std::move(ModuleName)), Symbols(std::move(Symbols)) {}
+  UnexpectedSymbolDefinitions(std::shared_ptr<SymbolStringPool> SSP,
+                              std::string ModuleName, SymbolNameVector Symbols)
+      : SSP(std::move(SSP)), ModuleName(std::move(ModuleName)),
+        Symbols(std::move(Symbols)) {}
   std::error_code convertToErrorCode() const override;
   void log(raw_ostream &OS) const override;
+  std::shared_ptr<SymbolStringPool> getSymbolStringPool() { return SSP; }
   const std::string &getModuleName() const { return ModuleName; }
   const SymbolNameVector &getSymbols() const { return Symbols; }
 private:
+  std::shared_ptr<SymbolStringPool> SSP;
   std::string ModuleName;
   SymbolNameVector Symbols;
 };
@@ -1241,21 +1255,6 @@ public:
                          const DenseMap<JITDylib *, SymbolLookupSet> &InitSyms);
 };
 
-/// Represents an abstract task for ORC to run.
-class Task : public RTTIExtends<Task, RTTIRoot> {
-public:
-  static char ID;
-
-  /// Description of the task to be performed. Used for logging.
-  virtual void printDescription(raw_ostream &OS) = 0;
-
-  /// Run the task.
-  virtual void run() = 0;
-
-private:
-  void anchor() override;
-};
-
 /// A materialization task.
 class MaterializationTask : public RTTIExtends<MaterializationTask, Task> {
 public:
@@ -1285,13 +1284,16 @@ public:
   /// For reporting errors.
   using ErrorReporter = std::function<void(Error)>;
 
+  /// Send a result to the remote.
+  using SendResultFunction = unique_function<void(shared::WrapperFunctionResult)>;
+
   /// For dispatching ORC tasks (typically materialization tasks).
   using DispatchTaskFunction = unique_function<void(std::unique_ptr<Task> T)>;
 
   /// An asynchronous wrapper-function callable from the executor via
   /// jit-dispatch.
   using JITDispatchHandlerFunction = unique_function<void(
-      ExecutorProcessControl::SendResultFunction SendResult,
+      SendResultFunction SendResult,
       const char *ArgData, size_t ArgSize)>;
 
   /// A map associating tag names with asynchronous wrapper function
@@ -1303,13 +1305,19 @@ public:
   /// object.
   ExecutionSession(std::unique_ptr<ExecutorProcessControl> EPC);
 
-  /// End the session. Closes all JITDylibs.
+  /// End the session. Closes all JITDylibs and disconnects from the
+  /// executor.
   Error endSession();
 
   /// Get the ExecutorProcessControl object associated with this
   /// ExecutionSession.
   ExecutorProcessControl &getExecutorProcessControl() { return *EPC; }
 
+  /// Get the SymbolStringPool for this instance.
+  std::shared_ptr<SymbolStringPool> getSymbolStringPool() {
+    return EPC->getSymbolStringPool();
+  }
+
   /// Add a symbol name to the SymbolStringPool and return a pointer to it.
   SymbolStringPtr intern(StringRef SymName) { return EPC->intern(SymName); }
 
@@ -1462,10 +1470,9 @@ public:
   /// \endcode{.cpp}
   ///
   /// The given OnComplete function will be called to return the result.
-  void callWrapperAsync(ExecutorProcessControl::SendResultFunction OnComplete,
-                        JITTargetAddress WrapperFnAddr,
-                        ArrayRef<char> ArgBuffer) {
-    EPC->callWrapperAsync(std::move(OnComplete), WrapperFnAddr, ArgBuffer);
+  template <typename... ArgTs>
+  void callWrapperAsync(ArgTs &&... Args) {
+    EPC->callWrapperAsync(std::forward<ArgTs>(Args)...);
   }
 
   /// Run a wrapper function in the executor. The wrapper function should be
@@ -1474,30 +1481,18 @@ public:
   /// \code{.cpp}
   ///   CWrapperFunctionResult fn(uint8_t *Data, uint64_t Size);
   /// \endcode{.cpp}
-  shared::WrapperFunctionResult callWrapper(JITTargetAddress WrapperFnAddr,
+  shared::WrapperFunctionResult callWrapper(ExecutorAddr WrapperFnAddr,
                                             ArrayRef<char> ArgBuffer) {
-    std::promise<shared::WrapperFunctionResult> RP;
-    auto RF = RP.get_future();
-    callWrapperAsync(
-        [&](shared::WrapperFunctionResult R) { RP.set_value(std::move(R)); },
-        WrapperFnAddr, ArgBuffer);
-    return RF.get();
+    return EPC->callWrapper(WrapperFnAddr, ArgBuffer);
   }
 
   /// Run a wrapper function using SPS to serialize the arguments and
   /// deserialize the results.
   template <typename SPSSignature, typename SendResultT, typename... ArgTs>
-  void callSPSWrapperAsync(SendResultT &&SendResult,
-                           JITTargetAddress WrapperFnAddr,
+  void callSPSWrapperAsync(ExecutorAddr WrapperFnAddr, SendResultT &&SendResult,
                            const ArgTs &...Args) {
-    shared::WrapperFunction<SPSSignature>::callAsync(
-        [this,
-         WrapperFnAddr](ExecutorProcessControl::SendResultFunction SendResult,
-                        const char *ArgData, size_t ArgSize) {
-          callWrapperAsync(std::move(SendResult), WrapperFnAddr,
-                           ArrayRef<char>(ArgData, ArgSize));
-        },
-        std::move(SendResult), Args...);
+    EPC->callSPSWrapperAsync<SPSSignature, SendResultT, ArgTs...>(
+        WrapperFnAddr, std::forward<SendResultT>(SendResult), Args...);
   }
 
   /// Run a wrapper function using SPS to serialize the arguments and
@@ -1506,13 +1501,10 @@ public:
   /// If SPSSignature is a non-void function signature then the second argument
   /// (the first in the Args list) should be a reference to a return value.
   template <typename SPSSignature, typename... WrapperCallArgTs>
-  Error callSPSWrapper(JITTargetAddress WrapperFnAddr,
+  Error callSPSWrapper(ExecutorAddr WrapperFnAddr,
                        WrapperCallArgTs &&...WrapperCallArgs) {
-    return shared::WrapperFunction<SPSSignature>::call(
-        [this, WrapperFnAddr](const char *ArgData, size_t ArgSize) {
-          return callWrapper(WrapperFnAddr, ArrayRef<char>(ArgData, ArgSize));
-        },
-        std::forward<WrapperCallArgTs>(WrapperCallArgs)...);
+    return EPC->callSPSWrapper<SPSSignature, WrapperCallArgTs...>(
+        WrapperFnAddr, std::forward<WrapperCallArgTs>(WrapperCallArgs)...);
   }
 
   /// Wrap a handler that takes concrete argument types (and a sender for a
@@ -1525,7 +1517,7 @@ public:
   template <typename SPSSignature, typename HandlerT>
   static JITDispatchHandlerFunction wrapAsyncWithSPS(HandlerT &&H) {
     return [H = std::forward<HandlerT>(H)](
-               ExecutorProcessControl::SendResultFunction SendResult,
+               SendResultFunction SendResult,
                const char *ArgData, size_t ArgSize) mutable {
       shared::WrapperFunction<SPSSignature>::handleAsync(ArgData, ArgSize, H,
                                                          std::move(SendResult));
@@ -1564,7 +1556,7 @@ public:
   /// This should be called by the ExecutorProcessControl instance in response
   /// to incoming jit-dispatch requests from the executor.
   void
-  runJITDispatchHandler(ExecutorProcessControl::SendResultFunction SendResult,
+  runJITDispatchHandler(SendResultFunction SendResult,
                         JITTargetAddress HandlerFnTagAddr,
                         ArrayRef<char> ArgBuffer);
 
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/DebuggerSupportPlugin.h b/llvm/include/llvm/ExecutionEngine/Orc/DebuggerSupportPlugin.h
new file mode 100644
index 000000000000..af092b3287d3
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/Orc/DebuggerSupportPlugin.h
@@ -0,0 +1,64 @@
+//===--- DebugerSupportPlugin.h -- Utils for debugger support ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Generates debug objects and registers them using the jit-loader-gdb protocol.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_DEBUGGERSUPPORT_H
+#define LLVM_EXECUTIONENGINE_ORC_DEBUGGERSUPPORT_H
+
+#include "llvm/ExecutionEngine/Orc/Core.h"
+#include "llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h"
+#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h"
+
+namespace llvm {
+namespace orc {
+
+/// For each object containing debug info, installs JITLink passes to synthesize
+/// a debug object and then register it via the GDB JIT-registration interface.
+///
+/// Currently MachO only. For ELF use DebugObjectManagerPlugin. These two
+/// plugins will be merged in the near future.
+class GDBJITDebugInfoRegistrationPlugin : public ObjectLinkingLayer::Plugin {
+public:
+  class DebugSectionSynthesizer {
+  public:
+    virtual ~DebugSectionSynthesizer() {}
+    virtual Error startSynthesis() = 0;
+    virtual Error completeSynthesisAndRegister() = 0;
+  };
+
+  static Expected<std::unique_ptr<GDBJITDebugInfoRegistrationPlugin>>
+  Create(ExecutionSession &ES, JITDylib &ProcessJD, const Triple &TT);
+
+  GDBJITDebugInfoRegistrationPlugin(ExecutorAddr RegisterActionAddr)
+      : RegisterActionAddr(RegisterActionAddr) {}
+
+  Error notifyFailed(MaterializationResponsibility &MR) override;
+  Error notifyRemovingResources(ResourceKey K) override;
+
+  void notifyTransferringResources(ResourceKey DstKey,
+                                   ResourceKey SrcKey) override;
+
+  void modifyPassConfig(MaterializationResponsibility &MR,
+                        jitlink::LinkGraph &LG,
+                        jitlink::PassConfiguration &PassConfig) override;
+
+private:
+  void modifyPassConfigForMachO(MaterializationResponsibility &MR,
+                                jitlink::LinkGraph &LG,
+                                jitlink::PassConfiguration &PassConfig);
+
+  ExecutorAddr RegisterActionAddr;
+};
+
+} // namespace orc
+} // namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_DEBUGGERSUPPORT_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/ELFNixPlatform.h b/llvm/include/llvm/ExecutionEngine/Orc/ELFNixPlatform.h
new file mode 100644
index 000000000000..20da3e3b89eb
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/Orc/ELFNixPlatform.h
@@ -0,0 +1,330 @@
+//===-- ELFNixPlatform.h -- Utilities for executing ELF in Orc --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Linux/BSD support for executing JIT'd ELF in Orc.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_ELFNIXPLATFORM_H
+#define LLVM_EXECUTIONENGINE_ORC_ELFNIXPLATFORM_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ExecutionEngine/Orc/Core.h"
+#include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h"
+#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h"
+#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h"
+
+#include <future>
+#include <thread>
+#include <vector>
+
+namespace llvm {
+namespace orc {
+
+struct ELFPerObjectSectionsToRegister {
+  ExecutorAddrRange EHFrameSection;
+  ExecutorAddrRange ThreadDataSection;
+};
+
+struct ELFNixJITDylibInitializers {
+  using SectionList = std::vector<ExecutorAddrRange>;
+
+  ELFNixJITDylibInitializers(std::string Name, ExecutorAddr DSOHandleAddress)
+      : Name(std::move(Name)), DSOHandleAddress(std::move(DSOHandleAddress)) {}
+
+  std::string Name;
+  ExecutorAddr DSOHandleAddress;
+
+  StringMap<SectionList> InitSections;
+};
+
+class ELFNixJITDylibDeinitializers {};
+
+using ELFNixJITDylibInitializerSequence =
+    std::vector<ELFNixJITDylibInitializers>;
+
+using ELFNixJITDylibDeinitializerSequence =
+    std::vector<ELFNixJITDylibDeinitializers>;
+
+/// Mediates between ELFNix initialization and ExecutionSession state.
+class ELFNixPlatform : public Platform {
+public:
+  /// Try to create a ELFNixPlatform instance, adding the ORC runtime to the
+  /// given JITDylib.
+  ///
+  /// The ORC runtime requires access to a number of symbols in
+  /// libc++. It is up to the caller to ensure that the requried
+  /// symbols can be referenced by code added to PlatformJD. The
+  /// standard way to achieve this is to first attach dynamic library
+  /// search generators for either the given process, or for the
+  /// specific required libraries, to PlatformJD, then to create the
+  /// platform instance:
+  ///
+  /// \code{.cpp}
+  ///   auto &PlatformJD = ES.createBareJITDylib("stdlib");
+  ///   PlatformJD.addGenerator(
+  ///     ExitOnErr(EPCDynamicLibrarySearchGenerator
+  ///                 ::GetForTargetProcess(EPC)));
+  ///   ES.setPlatform(
+  ///     ExitOnErr(ELFNixPlatform::Create(ES, ObjLayer, EPC, PlatformJD,
+  ///                                     "/path/to/orc/runtime")));
+  /// \endcode
+  ///
+  /// Alternatively, these symbols could be added to another JITDylib that
+  /// PlatformJD links against.
+  ///
+  /// Clients are also responsible for ensuring that any JIT'd code that
+  /// depends on runtime functions (including any code using TLV or static
+  /// destructors) can reference the runtime symbols. This is usually achieved
+  /// by linking any JITDylibs containing regular code against
+  /// PlatformJD.
+  ///
+  /// By default, ELFNixPlatform will add the set of aliases returned by the
+  /// standardPlatformAliases function. This includes both required aliases
+  /// (e.g. __cxa_atexit -> __orc_rt_elf_cxa_atexit for static destructor
+  /// support), and optional aliases that provide JIT versions of common
+  /// functions (e.g. dlopen -> __orc_rt_elf_jit_dlopen). Clients can
+  /// override these defaults by passing a non-None value for the
+  /// RuntimeAliases function, in which case the client is responsible for
+  /// setting up all aliases (including the required ones).
+  static Expected<std::unique_ptr<ELFNixPlatform>>
+  Create(ExecutionSession &ES, ObjectLinkingLayer &ObjLinkingLayer,
+         JITDylib &PlatformJD, const char *OrcRuntimePath,
+         Optional<SymbolAliasMap> RuntimeAliases = None);
+
+  ExecutionSession &getExecutionSession() const { return ES; }
+  ObjectLinkingLayer &getObjectLinkingLayer() const { return ObjLinkingLayer; }
+
+  Error setupJITDylib(JITDylib &JD) override;
+  Error notifyAdding(ResourceTracker &RT,
+                     const MaterializationUnit &MU) override;
+  Error notifyRemoving(ResourceTracker &RT) override;
+
+  /// Returns an AliasMap containing the default aliases for the ELFNixPlatform.
+  /// This can be modified by clients when constructing the platform to add
+  /// or remove aliases.
+  static SymbolAliasMap standardPlatformAliases(ExecutionSession &ES);
+
+  /// Returns the array of required CXX aliases.
+  static ArrayRef<std::pair<const char *, const char *>> requiredCXXAliases();
+
+  /// Returns the array of standard runtime utility aliases for ELF.
+  static ArrayRef<std::pair<const char *, const char *>>
+  standardRuntimeUtilityAliases();
+
+  /// Returns true if the given section name is an initializer section.
+  static bool isInitializerSection(StringRef SecName);
+
+private:
+  // The ELFNixPlatformPlugin scans/modifies LinkGraphs to support ELF
+  // platform features including initializers, exceptions, TLV, and language
+  // runtime registration.
+  class ELFNixPlatformPlugin : public ObjectLinkingLayer::Plugin {
+  public:
+    ELFNixPlatformPlugin(ELFNixPlatform &MP) : MP(MP) {}
+
+    void modifyPassConfig(MaterializationResponsibility &MR,
+                          jitlink::LinkGraph &G,
+                          jitlink::PassConfiguration &Config) override;
+
+    SyntheticSymbolDependenciesMap
+    getSyntheticSymbolDependencies(MaterializationResponsibility &MR) override;
+
+    // FIXME: We should be tentatively tracking scraped sections and discarding
+    // if the MR fails.
+    Error notifyFailed(MaterializationResponsibility &MR) override {
+      return Error::success();
+    }
+
+    Error notifyRemovingResources(ResourceKey K) override {
+      return Error::success();
+    }
+
+    void notifyTransferringResources(ResourceKey DstKey,
+                                     ResourceKey SrcKey) override {}
+
+  private:
+    using InitSymbolDepMap =
+        DenseMap<MaterializationResponsibility *, JITLinkSymbolSet>;
+
+    void addInitializerSupportPasses(MaterializationResponsibility &MR,
+                                     jitlink::PassConfiguration &Config);
+
+    void addDSOHandleSupportPasses(MaterializationResponsibility &MR,
+                                   jitlink::PassConfiguration &Config);
+
+    void addEHAndTLVSupportPasses(MaterializationResponsibility &MR,
+                                  jitlink::PassConfiguration &Config);
+
+    Error preserveInitSections(jitlink::LinkGraph &G,
+                               MaterializationResponsibility &MR);
+
+    Error registerInitSections(jitlink::LinkGraph &G, JITDylib &JD);
+
+    Error fixTLVSectionsAndEdges(jitlink::LinkGraph &G, JITDylib &JD);
+
+    std::mutex PluginMutex;
+    ELFNixPlatform &MP;
+    InitSymbolDepMap InitSymbolDeps;
+  };
+
+  using SendInitializerSequenceFn =
+      unique_function<void(Expected<ELFNixJITDylibInitializerSequence>)>;
+
+  using SendDeinitializerSequenceFn =
+      unique_function<void(Expected<ELFNixJITDylibDeinitializerSequence>)>;
+
+  using SendSymbolAddressFn = unique_function<void(Expected<ExecutorAddr>)>;
+
+  static bool supportedTarget(const Triple &TT);
+
+  ELFNixPlatform(ExecutionSession &ES, ObjectLinkingLayer &ObjLinkingLayer,
+                 JITDylib &PlatformJD,
+                 std::unique_ptr<DefinitionGenerator> OrcRuntimeGenerator,
+                 Error &Err);
+
+  // Associate ELFNixPlatform JIT-side runtime support functions with handlers.
+  Error associateRuntimeSupportFunctions(JITDylib &PlatformJD);
+
+  void getInitializersBuildSequencePhase(SendInitializerSequenceFn SendResult,
+                                         JITDylib &JD,
+                                         std::vector<JITDylibSP> DFSLinkOrder);
+
+  void getInitializersLookupPhase(SendInitializerSequenceFn SendResult,
+                                  JITDylib &JD);
+
+  void rt_getInitializers(SendInitializerSequenceFn SendResult,
+                          StringRef JDName);
+
+  void rt_getDeinitializers(SendDeinitializerSequenceFn SendResult,
+                            ExecutorAddr Handle);
+
+  void rt_lookupSymbol(SendSymbolAddressFn SendResult, ExecutorAddr Handle,
+                       StringRef SymbolName);
+
+  // Records the addresses of runtime symbols used by the platform.
+  Error bootstrapELFNixRuntime(JITDylib &PlatformJD);
+
+  Error registerInitInfo(JITDylib &JD,
+                         ArrayRef<jitlink::Section *> InitSections);
+
+  Error registerPerObjectSections(const ELFPerObjectSectionsToRegister &POSR);
+
+  Expected<uint64_t> createPThreadKey();
+
+  ExecutionSession &ES;
+  ObjectLinkingLayer &ObjLinkingLayer;
+
+  SymbolStringPtr DSOHandleSymbol;
+  std::atomic<bool> RuntimeBootstrapped{false};
+
+  ExecutorAddr orc_rt_elfnix_platform_bootstrap;
+  ExecutorAddr orc_rt_elfnix_platform_shutdown;
+  ExecutorAddr orc_rt_elfnix_register_object_sections;
+  ExecutorAddr orc_rt_elfnix_create_pthread_key;
+
+  DenseMap<JITDylib *, SymbolLookupSet> RegisteredInitSymbols;
+
+  // InitSeqs gets its own mutex to avoid locking the whole session when
+  // aggregating data from the jitlink.
+  std::mutex PlatformMutex;
+  DenseMap<JITDylib *, ELFNixJITDylibInitializers> InitSeqs;
+  std::vector<ELFPerObjectSectionsToRegister> BootstrapPOSRs;
+
+  DenseMap<JITTargetAddress, JITDylib *> HandleAddrToJITDylib;
+  DenseMap<JITDylib *, uint64_t> JITDylibToPThreadKey;
+};
+
+namespace shared {
+
+using SPSELFPerObjectSectionsToRegister =
+    SPSTuple<SPSExecutorAddrRange, SPSExecutorAddrRange>;
+
+template <>
+class SPSSerializationTraits<SPSELFPerObjectSectionsToRegister,
+                             ELFPerObjectSectionsToRegister> {
+
+public:
+  static size_t size(const ELFPerObjectSectionsToRegister &MOPOSR) {
+    return SPSELFPerObjectSectionsToRegister::AsArgList::size(
+        MOPOSR.EHFrameSection, MOPOSR.ThreadDataSection);
+  }
+
+  static bool serialize(SPSOutputBuffer &OB,
+                        const ELFPerObjectSectionsToRegister &MOPOSR) {
+    return SPSELFPerObjectSectionsToRegister::AsArgList::serialize(
+        OB, MOPOSR.EHFrameSection, MOPOSR.ThreadDataSection);
+  }
+
+  static bool deserialize(SPSInputBuffer &IB,
+                          ELFPerObjectSectionsToRegister &MOPOSR) {
+    return SPSELFPerObjectSectionsToRegister::AsArgList::deserialize(
+        IB, MOPOSR.EHFrameSection, MOPOSR.ThreadDataSection);
+  }
+};
+
+using SPSNamedExecutorAddrRangeSequenceMap =
+    SPSSequence<SPSTuple<SPSString, SPSExecutorAddrRangeSequence>>;
+
+using SPSELFNixJITDylibInitializers =
+    SPSTuple<SPSString, SPSExecutorAddr, SPSNamedExecutorAddrRangeSequenceMap>;
+
+using SPSELFNixJITDylibInitializerSequence =
+    SPSSequence<SPSELFNixJITDylibInitializers>;
+
+/// Serialization traits for ELFNixJITDylibInitializers.
+template <>
+class SPSSerializationTraits<SPSELFNixJITDylibInitializers,
+                             ELFNixJITDylibInitializers> {
+public:
+  static size_t size(const ELFNixJITDylibInitializers &MOJDIs) {
+    return SPSELFNixJITDylibInitializers::AsArgList::size(
+        MOJDIs.Name, MOJDIs.DSOHandleAddress, MOJDIs.InitSections);
+  }
+
+  static bool serialize(SPSOutputBuffer &OB,
+                        const ELFNixJITDylibInitializers &MOJDIs) {
+    return SPSELFNixJITDylibInitializers::AsArgList::serialize(
+        OB, MOJDIs.Name, MOJDIs.DSOHandleAddress, MOJDIs.InitSections);
+  }
+
+  static bool deserialize(SPSInputBuffer &IB,
+                          ELFNixJITDylibInitializers &MOJDIs) {
+    return SPSELFNixJITDylibInitializers::AsArgList::deserialize(
+        IB, MOJDIs.Name, MOJDIs.DSOHandleAddress, MOJDIs.InitSections);
+  }
+};
+
+using SPSELFJITDylibDeinitializers = SPSEmpty;
+
+using SPSELFJITDylibDeinitializerSequence =
+    SPSSequence<SPSELFJITDylibDeinitializers>;
+
+template <>
+class SPSSerializationTraits<SPSELFJITDylibDeinitializers,
+                             ELFNixJITDylibDeinitializers> {
+public:
+  static size_t size(const ELFNixJITDylibDeinitializers &MOJDDs) { return 0; }
+
+  static bool serialize(SPSOutputBuffer &OB,
+                        const ELFNixJITDylibDeinitializers &MOJDDs) {
+    return true;
+  }
+
+  static bool deserialize(SPSInputBuffer &IB,
+                          ELFNixJITDylibDeinitializers &MOJDDs) {
+    MOJDDs = ELFNixJITDylibDeinitializers();
+    return true;
+  }
+};
+
+} // end namespace shared
+} // end namespace orc
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_ELFNIXPLATFORM_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h b/llvm/include/llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h
index 410a202b3296..940d0d28ae83 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h
@@ -14,6 +14,7 @@
 #define LLVM_EXECUTIONENGINE_ORC_EPCDEBUGOBJECTREGISTRAR_H
 
 #include "llvm/ExecutionEngine/JITSymbol.h"
+#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h"
 #include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/Memory.h"
@@ -32,7 +33,7 @@ class ExecutionSession;
 /// Abstract interface for registering debug objects in the executor process.
 class DebugObjectRegistrar {
 public:
-  virtual Error registerDebugObject(sys::MemoryBlock) = 0;
+  virtual Error registerDebugObject(ExecutorAddrRange TargetMem) = 0;
   virtual ~DebugObjectRegistrar() {}
 };
 
@@ -40,14 +41,14 @@ public:
 /// executor process.
 class EPCDebugObjectRegistrar : public DebugObjectRegistrar {
 public:
-  EPCDebugObjectRegistrar(ExecutionSession &ES, JITTargetAddress RegisterFn)
+  EPCDebugObjectRegistrar(ExecutionSession &ES, ExecutorAddr RegisterFn)
       : ES(ES), RegisterFn(RegisterFn) {}
 
-  Error registerDebugObject(sys::MemoryBlock TargetMem) override;
+  Error registerDebugObject(ExecutorAddrRange TargetMem) override;
 
 private:
   ExecutionSession &ES;
-  JITTargetAddress RegisterFn;
+  ExecutorAddr RegisterFn;
 };
 
 /// Create a ExecutorProcessControl-based DebugObjectRegistrar that emits debug
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/EPCEHFrameRegistrar.h b/llvm/include/llvm/ExecutionEngine/Orc/EPCEHFrameRegistrar.h
index 8cd6e9319a28..6d113a7bdf1a 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/EPCEHFrameRegistrar.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/EPCEHFrameRegistrar.h
@@ -14,6 +14,7 @@
 #define LLVM_EXECUTIONENGINE_ORC_EPCEHFRAMEREGISTRAR_H
 
 #include "llvm/ExecutionEngine/JITLink/EHFrameSupport.h"
+#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h"
 
 namespace llvm {
 namespace orc {
@@ -33,8 +34,8 @@ public:
   /// Create a EPCEHFrameRegistrar with the given ExecutorProcessControl
   /// object and registration/deregistration function addresses.
   EPCEHFrameRegistrar(ExecutionSession &ES,
-                      JITTargetAddress RegisterEHFrameWrapperFnAddr,
-                      JITTargetAddress DeregisterEHFRameWrapperFnAddr)
+                      ExecutorAddr RegisterEHFrameWrapperFnAddr,
+                      ExecutorAddr DeregisterEHFRameWrapperFnAddr)
       : ES(ES), RegisterEHFrameWrapperFnAddr(RegisterEHFrameWrapperFnAddr),
         DeregisterEHFrameWrapperFnAddr(DeregisterEHFRameWrapperFnAddr) {}
 
@@ -45,8 +46,8 @@ public:
 
 private:
   ExecutionSession &ES;
-  JITTargetAddress RegisterEHFrameWrapperFnAddr;
-  JITTargetAddress DeregisterEHFrameWrapperFnAddr;
+  ExecutorAddr RegisterEHFrameWrapperFnAddr;
+  ExecutorAddr DeregisterEHFrameWrapperFnAddr;
 };
 
 } // end namespace orc
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericDylibManager.h b/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericDylibManager.h
new file mode 100644
index 000000000000..02e580c86f54
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericDylibManager.h
@@ -0,0 +1,67 @@
+//===- EPCGenericDylibManager.h -- Generic EPC Dylib management -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements dylib loading and searching by making calls to
+// ExecutorProcessControl::callWrapper.
+//
+// This simplifies the implementaton of new ExecutorProcessControl instances,
+// as this implementation will always work (at the cost of some performance
+// overhead for the calls).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_EPCGENERICDYLIBMANAGER_H
+#define LLVM_EXECUTIONENGINE_ORC_EPCGENERICDYLIBMANAGER_H
+
+#include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h"
+#include "llvm/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.h"
+
+namespace llvm {
+namespace orc {
+
+class SymbolLookupSet;
+
+class EPCGenericDylibManager {
+public:
+  /// Function addresses for memory access.
+  struct SymbolAddrs {
+    ExecutorAddr Instance;
+    ExecutorAddr Open;
+    ExecutorAddr Lookup;
+  };
+
+  /// Create an EPCGenericMemoryAccess instance from a given set of
+  /// function addrs.
+  static Expected<EPCGenericDylibManager>
+  CreateWithDefaultBootstrapSymbols(ExecutorProcessControl &EPC);
+
+  /// Create an EPCGenericMemoryAccess instance from a given set of
+  /// function addrs.
+  EPCGenericDylibManager(ExecutorProcessControl &EPC, SymbolAddrs SAs)
+      : EPC(EPC), SAs(SAs) {}
+
+  /// Loads the dylib with the given name.
+  Expected<tpctypes::DylibHandle> open(StringRef Path, uint64_t Mode);
+
+  /// Looks up symbols within the given dylib.
+  Expected<std::vector<ExecutorAddr>> lookup(tpctypes::DylibHandle H,
+                                             const SymbolLookupSet &Lookup);
+
+  /// Looks up symbols within the given dylib.
+  Expected<std::vector<ExecutorAddr>>
+  lookup(tpctypes::DylibHandle H, const RemoteSymbolLookupSet &Lookup);
+
+private:
+  ExecutorProcessControl &EPC;
+  SymbolAddrs SAs;
+};
+
+} // end namespace orc
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_EPCGENERICDYLIBMANAGER_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.h b/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.h
new file mode 100644
index 000000000000..b9825f17ec17
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.h
@@ -0,0 +1,97 @@
+//===- EPCGenericJITLinkMemoryManager.h - EPC-based mem manager -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements JITLinkMemoryManager by making remove calls via
+// ExecutorProcessControl::callWrapperAsync.
+//
+// This simplifies the implementaton of new ExecutorProcessControl instances,
+// as this implementation will always work (at the cost of some performance
+// overhead for the calls).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_EPCGENERICJITLINKMEMORYMANAGER_H
+#define LLVM_EXECUTIONENGINE_ORC_EPCGENERICJITLINKMEMORYMANAGER_H
+
+#include "llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h"
+#include "llvm/ExecutionEngine/Orc/Core.h"
+
+namespace llvm {
+namespace orc {
+
+class EPCGenericJITLinkMemoryManager : public jitlink::JITLinkMemoryManager {
+public:
+  /// Function addresses for memory access.
+  struct SymbolAddrs {
+    ExecutorAddr Allocator;
+    ExecutorAddr Reserve;
+    ExecutorAddr Finalize;
+    ExecutorAddr Deallocate;
+  };
+
+  /// Create an EPCGenericJITLinkMemoryManager instance from a given set of
+  /// function addrs.
+  EPCGenericJITLinkMemoryManager(ExecutorProcessControl &EPC, SymbolAddrs SAs)
+      : EPC(EPC), SAs(SAs) {}
+
+  void allocate(const jitlink::JITLinkDylib *JD, jitlink::LinkGraph &G,
+                OnAllocatedFunction OnAllocated) override;
+
+  // Use overloads from base class.
+  using JITLinkMemoryManager::allocate;
+
+  void deallocate(std::vector<FinalizedAlloc> Allocs,
+                  OnDeallocatedFunction OnDeallocated) override;
+
+  // Use overloads from base class.
+  using JITLinkMemoryManager::deallocate;
+
+private:
+  class InFlightAlloc;
+
+  void completeAllocation(ExecutorAddr AllocAddr, jitlink::BasicLayout BL,
+                          OnAllocatedFunction OnAllocated);
+
+  ExecutorProcessControl &EPC;
+  SymbolAddrs SAs;
+};
+
+namespace shared {
+
+/// FIXME: This specialization should be moved into TargetProcessControlTypes.h
+///        (or whereever those types get merged to) once ORC depends on JITLink.
+template <>
+class SPSSerializationTraits<SPSExecutorAddr,
+                             jitlink::JITLinkMemoryManager::FinalizedAlloc> {
+public:
+  static size_t size(const jitlink::JITLinkMemoryManager::FinalizedAlloc &FA) {
+    return SPSArgList<SPSExecutorAddr>::size(ExecutorAddr(FA.getAddress()));
+  }
+
+  static bool
+  serialize(SPSOutputBuffer &OB,
+            const jitlink::JITLinkMemoryManager::FinalizedAlloc &FA) {
+    return SPSArgList<SPSExecutorAddr>::serialize(
+        OB, ExecutorAddr(FA.getAddress()));
+  }
+
+  static bool deserialize(SPSInputBuffer &IB,
+                          jitlink::JITLinkMemoryManager::FinalizedAlloc &FA) {
+    ExecutorAddr A;
+    if (!SPSArgList<SPSExecutorAddr>::deserialize(IB, A))
+      return false;
+    FA = jitlink::JITLinkMemoryManager::FinalizedAlloc(A.getValue());
+    return true;
+  }
+};
+
+} // end namespace shared
+} // end namespace orc
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_EPCGENERICJITLINKMEMORYMANAGER_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericMemoryAccess.h b/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericMemoryAccess.h
new file mode 100644
index 000000000000..8c1d457d06ab
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericMemoryAccess.h
@@ -0,0 +1,85 @@
+//===- EPCGenericMemoryAccess.h - Generic EPC MemoryAccess impl -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements ExecutorProcessControl::MemoryAccess by making calls to
+// ExecutorProcessControl::callWrapperAsync.
+//
+// This simplifies the implementaton of new ExecutorProcessControl instances,
+// as this implementation will always work (at the cost of some performance
+// overhead for the calls).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_EPCGENERICMEMORYACCESS_H
+#define LLVM_EXECUTIONENGINE_ORC_EPCGENERICMEMORYACCESS_H
+
+#include "llvm/ExecutionEngine/Orc/Core.h"
+
+namespace llvm {
+namespace orc {
+
+class EPCGenericMemoryAccess : public ExecutorProcessControl::MemoryAccess {
+public:
+  /// Function addresses for memory access.
+  struct FuncAddrs {
+    ExecutorAddr WriteUInt8s;
+    ExecutorAddr WriteUInt16s;
+    ExecutorAddr WriteUInt32s;
+    ExecutorAddr WriteUInt64s;
+    ExecutorAddr WriteBuffers;
+  };
+
+  /// Create an EPCGenericMemoryAccess instance from a given set of
+  /// function addrs.
+  EPCGenericMemoryAccess(ExecutorProcessControl &EPC, FuncAddrs FAs)
+      : EPC(EPC), FAs(FAs) {}
+
+  void writeUInt8sAsync(ArrayRef<tpctypes::UInt8Write> Ws,
+                        WriteResultFn OnWriteComplete) override {
+    using namespace shared;
+    EPC.callSPSWrapperAsync<void(SPSSequence<SPSMemoryAccessUInt8Write>)>(
+        FAs.WriteUInt8s, std::move(OnWriteComplete), Ws);
+  }
+
+  void writeUInt16sAsync(ArrayRef<tpctypes::UInt16Write> Ws,
+                         WriteResultFn OnWriteComplete) override {
+    using namespace shared;
+    EPC.callSPSWrapperAsync<void(SPSSequence<SPSMemoryAccessUInt16Write>)>(
+        FAs.WriteUInt16s, std::move(OnWriteComplete), Ws);
+  }
+
+  void writeUInt32sAsync(ArrayRef<tpctypes::UInt32Write> Ws,
+                         WriteResultFn OnWriteComplete) override {
+    using namespace shared;
+    EPC.callSPSWrapperAsync<void(SPSSequence<SPSMemoryAccessUInt32Write>)>(
+        FAs.WriteUInt32s, std::move(OnWriteComplete), Ws);
+  }
+
+  void writeUInt64sAsync(ArrayRef<tpctypes::UInt64Write> Ws,
+                         WriteResultFn OnWriteComplete) override {
+    using namespace shared;
+    EPC.callSPSWrapperAsync<void(SPSSequence<SPSMemoryAccessUInt64Write>)>(
+        FAs.WriteUInt64s, std::move(OnWriteComplete), Ws);
+  }
+
+  void writeBuffersAsync(ArrayRef<tpctypes::BufferWrite> Ws,
+                         WriteResultFn OnWriteComplete) override {
+    using namespace shared;
+    EPC.callSPSWrapperAsync<void(SPSSequence<SPSMemoryAccessBufferWrite>)>(
+        FAs.WriteBuffers, std::move(OnWriteComplete), Ws);
+  }
+
+private:
+  ExecutorProcessControl &EPC;
+  FuncAddrs FAs;
+};
+
+} // end namespace orc
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_EPCGENERICMEMORYACCESS_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.h b/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.h
new file mode 100644
index 000000000000..b6fdfb92ced3
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.h
@@ -0,0 +1,133 @@
+//===---- EPCGenericRTDyldMemoryManager.h - EPC-based MemMgr ----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines a RuntimeDyld::MemoryManager that uses EPC and the ORC runtime
+// bootstrap functions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_EPCGENERICRTDYLDMEMORYMANAGER_H
+#define LLVM_EXECUTIONENGINE_ORC_EPCGENERICRTDYLDMEMORYMANAGER_H
+
+#include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h"
+#include "llvm/ExecutionEngine/RuntimeDyld.h"
+
+#define DEBUG_TYPE "orc"
+
+namespace llvm {
+namespace orc {
+
+/// Remote-mapped RuntimeDyld-compatible memory manager.
+class EPCGenericRTDyldMemoryManager : public RuntimeDyld::MemoryManager {
+public:
+  /// Symbol addresses for memory access.
+  struct SymbolAddrs {
+    ExecutorAddr Instance;
+    ExecutorAddr Reserve;
+    ExecutorAddr Finalize;
+    ExecutorAddr Deallocate;
+    ExecutorAddr RegisterEHFrame;
+    ExecutorAddr DeregisterEHFrame;
+  };
+
+  /// Create an EPCGenericRTDyldMemoryManager using the given EPC, looking up
+  /// the default symbol names in the bootstrap symbol set.
+  static Expected<std::unique_ptr<EPCGenericRTDyldMemoryManager>>
+  CreateWithDefaultBootstrapSymbols(ExecutorProcessControl &EPC);
+
+  /// Create an EPCGenericRTDyldMemoryManager using the given EPC and symbol
+  /// addrs.
+  EPCGenericRTDyldMemoryManager(ExecutorProcessControl &EPC, SymbolAddrs SAs);
+
+  EPCGenericRTDyldMemoryManager(const EPCGenericRTDyldMemoryManager &) = delete;
+  EPCGenericRTDyldMemoryManager &
+  operator=(const EPCGenericRTDyldMemoryManager &) = delete;
+  EPCGenericRTDyldMemoryManager(EPCGenericRTDyldMemoryManager &&) = delete;
+  EPCGenericRTDyldMemoryManager &
+  operator=(EPCGenericRTDyldMemoryManager &&) = delete;
+  ~EPCGenericRTDyldMemoryManager();
+
+  uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
+                               unsigned SectionID,
+                               StringRef SectionName) override;
+
+  uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
+                               unsigned SectionID, StringRef SectionName,
+                               bool IsReadOnly) override;
+
+  void reserveAllocationSpace(uintptr_t CodeSize, uint32_t CodeAlign,
+                              uintptr_t RODataSize, uint32_t RODataAlign,
+                              uintptr_t RWDataSize,
+                              uint32_t RWDataAlign) override;
+
+  bool needsToReserveAllocationSpace() override;
+
+  void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr, size_t Size) override;
+
+  void deregisterEHFrames() override;
+
+  void notifyObjectLoaded(RuntimeDyld &Dyld,
+                          const object::ObjectFile &Obj) override;
+
+  bool finalizeMemory(std::string *ErrMsg = nullptr) override;
+
+private:
+  struct Alloc {
+  public:
+    Alloc(uint64_t Size, unsigned Align)
+        : Size(Size), Align(Align),
+          Contents(std::make_unique<uint8_t[]>(Size + Align - 1)) {}
+
+    uint64_t Size;
+    unsigned Align;
+    std::unique_ptr<uint8_t[]> Contents;
+    ExecutorAddr RemoteAddr;
+  };
+
+  struct EHFrame {
+    ExecutorAddr Addr;
+    uint64_t Size;
+  };
+
+  // Group of section allocations to be allocated together in the executor. The
+  // RemoteCodeAddr will stand in as the id of the group for deallocation
+  // purposes.
+  struct AllocGroup {
+    AllocGroup() = default;
+    AllocGroup(const AllocGroup &) = delete;
+    AllocGroup &operator=(const AllocGroup &) = delete;
+    AllocGroup(AllocGroup &&) = default;
+    AllocGroup &operator=(AllocGroup &&) = default;
+
+    ExecutorAddrRange RemoteCode;
+    ExecutorAddrRange RemoteROData;
+    ExecutorAddrRange RemoteRWData;
+    std::vector<EHFrame> UnfinalizedEHFrames;
+    std::vector<Alloc> CodeAllocs, RODataAllocs, RWDataAllocs;
+  };
+
+  // Maps all allocations in Allocs to aligned blocks
+  void mapAllocsToRemoteAddrs(RuntimeDyld &Dyld, std::vector<Alloc> &Allocs,
+                              ExecutorAddr NextAddr);
+
+  ExecutorProcessControl &EPC;
+  SymbolAddrs SAs;
+
+  std::mutex M;
+  std::vector<AllocGroup> Unmapped;
+  std::vector<AllocGroup> Unfinalized;
+  std::vector<ExecutorAddr> FinalizedAllocs;
+  std::string ErrMsg;
+};
+
+} // end namespace orc
+} // end namespace llvm
+
+#undef DEBUG_TYPE
+
+#endif // LLVM_EXECUTIONENGINE_ORC_EPCGENERICRTDYLDMEMORYMANAGER_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/EPCIndirectionUtils.h b/llvm/include/llvm/ExecutionEngine/Orc/EPCIndirectionUtils.h
index 64f16d507c97..92de5882bafe 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/EPCIndirectionUtils.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/EPCIndirectionUtils.h
@@ -126,7 +126,7 @@ public:
   }
 
 private:
-  using Allocation = jitlink::JITLinkMemoryManager::Allocation;
+  using FinalizedAlloc = jitlink::JITLinkMemoryManager::FinalizedAlloc;
 
   struct IndirectStubInfo {
     IndirectStubInfo() = default;
@@ -149,12 +149,12 @@ private:
   ExecutorProcessControl &EPC;
   std::unique_ptr<ABISupport> ABI;
   JITTargetAddress ResolverBlockAddr;
-  std::unique_ptr<jitlink::JITLinkMemoryManager::Allocation> ResolverBlock;
+  FinalizedAlloc ResolverBlock;
   std::unique_ptr<TrampolinePool> TP;
   std::unique_ptr<LazyCallThroughManager> LCTM;
 
   std::vector<IndirectStubInfo> AvailableIndirectStubs;
-  std::vector<std::unique_ptr<Allocation>> IndirectStubAllocs;
+  std::vector<FinalizedAlloc> IndirectStubAllocs;
 };
 
 /// This will call writeResolver on the given EPCIndirectionUtils instance
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/ExecutorProcessControl.h b/llvm/include/llvm/ExecutionEngine/Orc/ExecutorProcessControl.h
index d540d0cd0608..105dac8e8d04 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/ExecutorProcessControl.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/ExecutorProcessControl.h
@@ -13,7 +13,6 @@
 #ifndef LLVM_EXECUTIONENGINE_ORC_EXECUTORPROCESSCONTROL_H
 #define LLVM_EXECUTIONENGINE_ORC_EXECUTORPROCESSCONTROL_H
 
-#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h"
@@ -21,6 +20,7 @@
 #include "llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h"
 #include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h"
 #include "llvm/ExecutionEngine/Orc/SymbolStringPool.h"
+#include "llvm/ExecutionEngine/Orc/TaskDispatch.h"
 #include "llvm/Support/DynamicLibrary.h"
 #include "llvm/Support/MSVCErrorWorkarounds.h"
 
@@ -37,11 +37,65 @@ class SymbolLookupSet;
 /// ExecutorProcessControl supports interaction with a JIT target process.
 class ExecutorProcessControl {
   friend class ExecutionSession;
-
 public:
-  /// Sender to return the result of a WrapperFunction executed in the JIT.
-  using SendResultFunction =
-      unique_function<void(shared::WrapperFunctionResult)>;
+
+  /// A handler or incoming WrapperFunctionResults -- either return values from
+  /// callWrapper* calls, or incoming JIT-dispatch requests.
+  ///
+  /// IncomingWFRHandlers are constructible from
+  /// unique_function<void(shared::WrapperFunctionResult)>s using the
+  /// runInPlace function or a RunWithDispatch object.
+  class IncomingWFRHandler {
+    friend class ExecutorProcessControl;
+  public:
+    IncomingWFRHandler() = default;
+    explicit operator bool() const { return !!H; }
+    void operator()(shared::WrapperFunctionResult WFR) { H(std::move(WFR)); }
+  private:
+    template <typename FnT> IncomingWFRHandler(FnT &&Fn)
+      : H(std::forward<FnT>(Fn)) {}
+
+    unique_function<void(shared::WrapperFunctionResult)> H;
+  };
+
+  /// Constructs an IncomingWFRHandler from a function object that is callable
+  /// as void(shared::WrapperFunctionResult). The function object will be called
+  /// directly. This should be used with care as it may block listener threads
+  /// in remote EPCs. It is only suitable for simple tasks (e.g. setting a
+  /// future), or for performing some quick analysis before dispatching "real"
+  /// work as a Task.
+  class RunInPlace {
+  public:
+    template <typename FnT>
+    IncomingWFRHandler operator()(FnT &&Fn) {
+      return IncomingWFRHandler(std::forward<FnT>(Fn));
+    }
+  };
+
+  /// Constructs an IncomingWFRHandler from a function object by creating a new
+  /// function object that dispatches the original using a TaskDispatcher,
+  /// wrapping the original as a GenericNamedTask.
+  ///
+  /// This is the default approach for running WFR handlers.
+  class RunAsTask {
+  public:
+    RunAsTask(TaskDispatcher &D) : D(D) {}
+
+    template <typename FnT>
+    IncomingWFRHandler operator()(FnT &&Fn) {
+      return IncomingWFRHandler(
+          [&D = this->D, Fn = std::move(Fn)]
+          (shared::WrapperFunctionResult WFR) mutable {
+              D.dispatch(
+                makeGenericNamedTask(
+                    [Fn = std::move(Fn), WFR = std::move(WFR)]() mutable {
+                      Fn(std::move(WFR));
+                    }, "WFR handler task"));
+          });
+    }
+  private:
+    TaskDispatcher &D;
+  };
 
   /// APIs for manipulating memory in the target process.
   class MemoryAccess {
@@ -51,53 +105,58 @@ public:
 
     virtual ~MemoryAccess();
 
-    virtual void writeUInt8s(ArrayRef<tpctypes::UInt8Write> Ws,
-                             WriteResultFn OnWriteComplete) = 0;
+    virtual void writeUInt8sAsync(ArrayRef<tpctypes::UInt8Write> Ws,
+                                  WriteResultFn OnWriteComplete) = 0;
 
-    virtual void writeUInt16s(ArrayRef<tpctypes::UInt16Write> Ws,
-                              WriteResultFn OnWriteComplete) = 0;
+    virtual void writeUInt16sAsync(ArrayRef<tpctypes::UInt16Write> Ws,
+                                   WriteResultFn OnWriteComplete) = 0;
 
-    virtual void writeUInt32s(ArrayRef<tpctypes::UInt32Write> Ws,
-                              WriteResultFn OnWriteComplete) = 0;
+    virtual void writeUInt32sAsync(ArrayRef<tpctypes::UInt32Write> Ws,
+                                   WriteResultFn OnWriteComplete) = 0;
 
-    virtual void writeUInt64s(ArrayRef<tpctypes::UInt64Write> Ws,
-                              WriteResultFn OnWriteComplete) = 0;
+    virtual void writeUInt64sAsync(ArrayRef<tpctypes::UInt64Write> Ws,
+                                   WriteResultFn OnWriteComplete) = 0;
 
-    virtual void writeBuffers(ArrayRef<tpctypes::BufferWrite> Ws,
-                              WriteResultFn OnWriteComplete) = 0;
+    virtual void writeBuffersAsync(ArrayRef<tpctypes::BufferWrite> Ws,
+                                   WriteResultFn OnWriteComplete) = 0;
 
     Error writeUInt8s(ArrayRef<tpctypes::UInt8Write> Ws) {
       std::promise<MSVCPError> ResultP;
       auto ResultF = ResultP.get_future();
-      writeUInt8s(Ws, [&](Error Err) { ResultP.set_value(std::move(Err)); });
+      writeUInt8sAsync(Ws,
+                       [&](Error Err) { ResultP.set_value(std::move(Err)); });
       return ResultF.get();
     }
 
     Error writeUInt16s(ArrayRef<tpctypes::UInt16Write> Ws) {
       std::promise<MSVCPError> ResultP;
       auto ResultF = ResultP.get_future();
-      writeUInt16s(Ws, [&](Error Err) { ResultP.set_value(std::move(Err)); });
+      writeUInt16sAsync(Ws,
+                        [&](Error Err) { ResultP.set_value(std::move(Err)); });
       return ResultF.get();
     }
 
     Error writeUInt32s(ArrayRef<tpctypes::UInt32Write> Ws) {
       std::promise<MSVCPError> ResultP;
       auto ResultF = ResultP.get_future();
-      writeUInt32s(Ws, [&](Error Err) { ResultP.set_value(std::move(Err)); });
+      writeUInt32sAsync(Ws,
+                        [&](Error Err) { ResultP.set_value(std::move(Err)); });
       return ResultF.get();
     }
 
     Error writeUInt64s(ArrayRef<tpctypes::UInt64Write> Ws) {
       std::promise<MSVCPError> ResultP;
       auto ResultF = ResultP.get_future();
-      writeUInt64s(Ws, [&](Error Err) { ResultP.set_value(std::move(Err)); });
+      writeUInt64sAsync(Ws,
+                        [&](Error Err) { ResultP.set_value(std::move(Err)); });
       return ResultF.get();
     }
 
     Error writeBuffers(ArrayRef<tpctypes::BufferWrite> Ws) {
       std::promise<MSVCPError> ResultP;
       auto ResultF = ResultP.get_future();
-      writeBuffers(Ws, [&](Error Err) { ResultP.set_value(std::move(Err)); });
+      writeBuffersAsync(Ws,
+                        [&](Error Err) { ResultP.set_value(std::move(Err)); });
       return ResultF.get();
     }
   };
@@ -113,10 +172,14 @@ public:
   /// Contains the address of the dispatch function and context that the ORC
   /// runtime can use to call functions in the JIT.
   struct JITDispatchInfo {
-    ExecutorAddress JITDispatchFunctionAddress;
-    ExecutorAddress JITDispatchContextAddress;
+    ExecutorAddr JITDispatchFunction;
+    ExecutorAddr JITDispatchContext;
   };
 
+  ExecutorProcessControl(std::shared_ptr<SymbolStringPool> SSP,
+                         std::unique_ptr<TaskDispatcher> D)
+    : SSP(std::move(SSP)), D(std::move(D)) {}
+
   virtual ~ExecutorProcessControl();
 
   /// Return the ExecutionSession associated with this instance.
@@ -132,6 +195,8 @@ public:
   /// Return a shared pointer to the SymbolStringPool for this instance.
   std::shared_ptr<SymbolStringPool> getSymbolStringPool() const { return SSP; }
 
+  TaskDispatcher &getDispatcher() { return *D; }
+
   /// Return the Triple for the target process.
   const Triple &getTargetTriple() const { return TargetTriple; }
 
@@ -153,6 +218,29 @@ public:
     return *MemMgr;
   }
 
+  /// Returns the bootstrap symbol map.
+  const StringMap<ExecutorAddr> &getBootstrapSymbolsMap() const {
+    return BootstrapSymbols;
+  }
+
+  /// For each (ExecutorAddr&, StringRef) pair, looks up the string in the
+  /// bootstrap symbols map and writes its address to the ExecutorAddr if
+  /// found. If any symbol is not found then the function returns an error.
+  Error getBootstrapSymbols(
+      ArrayRef<std::pair<ExecutorAddr &, StringRef>> Pairs) const {
+    for (auto &KV : Pairs) {
+      auto I = BootstrapSymbols.find(KV.second);
+      if (I == BootstrapSymbols.end())
+        return make_error<StringError>("Symbol \"" + KV.second +
+                                           "\" not found "
+                                           "in bootstrap symbols map",
+                                       inconvertibleErrorCode());
+
+      KV.first = I->second;
+    }
+    return Error::success();
+  }
+
   /// Load the dynamic library at the given path and return a handle to it.
   /// If LibraryPath is null this function will return the global handle for
   /// the target process.
@@ -163,44 +251,119 @@ public:
   /// The result of the lookup is a 2-dimentional array of target addresses
   /// that correspond to the lookup order. If a required symbol is not
   /// found then this method will return an error. If a weakly referenced
-  /// symbol is not found then it be assigned a '0' value in the result.
-  /// that correspond to the lookup order.
+  /// symbol is not found then it be assigned a '0' value.
   virtual Expected<std::vector<tpctypes::LookupResult>>
   lookupSymbols(ArrayRef<LookupRequest> Request) = 0;
 
   /// Run function with a main-like signature.
-  virtual Expected<int32_t> runAsMain(JITTargetAddress MainFnAddr,
+  virtual Expected<int32_t> runAsMain(ExecutorAddr MainFnAddr,
                                       ArrayRef<std::string> Args) = 0;
 
-  /// Run a wrapper function in the executor.
+  /// Run a wrapper function in the executor. The given WFRHandler will be
+  /// called on the result when it is returned.
   ///
   /// The wrapper function should be callable as:
   ///
   /// \code{.cpp}
   ///   CWrapperFunctionResult fn(uint8_t *Data, uint64_t Size);
   /// \endcode{.cpp}
-  ///
-  /// The given OnComplete function will be called to return the result.
-  virtual void callWrapperAsync(SendResultFunction OnComplete,
-                                JITTargetAddress WrapperFnAddr,
+  virtual void callWrapperAsync(ExecutorAddr WrapperFnAddr,
+                                IncomingWFRHandler OnComplete,
                                 ArrayRef<char> ArgBuffer) = 0;
 
+  /// Run a wrapper function in the executor using the given Runner to dispatch
+  /// OnComplete when the result is ready.
+  template <typename RunPolicyT, typename FnT>
+  void callWrapperAsync(RunPolicyT &&Runner, ExecutorAddr WrapperFnAddr,
+                        FnT &&OnComplete, ArrayRef<char> ArgBuffer) {
+    callWrapperAsync(
+        WrapperFnAddr, Runner(std::forward<FnT>(OnComplete)), ArgBuffer);
+  }
+
+  /// Run a wrapper function in the executor. OnComplete will be dispatched
+  /// as a GenericNamedTask using this instance's TaskDispatch object.
+  template <typename FnT>
+  void callWrapperAsync(ExecutorAddr WrapperFnAddr, FnT &&OnComplete,
+                        ArrayRef<char> ArgBuffer) {
+    callWrapperAsync(RunAsTask(*D), WrapperFnAddr,
+                     std::forward<FnT>(OnComplete), ArgBuffer);
+  }
+
+  /// Run a wrapper function in the executor. The wrapper function should be
+  /// callable as:
+  ///
+  /// \code{.cpp}
+  ///   CWrapperFunctionResult fn(uint8_t *Data, uint64_t Size);
+  /// \endcode{.cpp}
+  shared::WrapperFunctionResult callWrapper(ExecutorAddr WrapperFnAddr,
+                                            ArrayRef<char> ArgBuffer) {
+    std::promise<shared::WrapperFunctionResult> RP;
+    auto RF = RP.get_future();
+    callWrapperAsync(
+        RunInPlace(), WrapperFnAddr,
+        [&](shared::WrapperFunctionResult R) {
+          RP.set_value(std::move(R));
+        }, ArgBuffer);
+    return RF.get();
+  }
+
+  /// Run a wrapper function using SPS to serialize the arguments and
+  /// deserialize the results.
+  template <typename SPSSignature, typename RunPolicyT, typename SendResultT,
+            typename... ArgTs>
+  void callSPSWrapperAsync(RunPolicyT &&Runner, ExecutorAddr WrapperFnAddr,
+                           SendResultT &&SendResult, const ArgTs &...Args) {
+    shared::WrapperFunction<SPSSignature>::callAsync(
+        [this, WrapperFnAddr, Runner = std::move(Runner)]
+        (auto &&SendResult, const char *ArgData, size_t ArgSize) mutable {
+          this->callWrapperAsync(std::move(Runner), WrapperFnAddr,
+                                 std::move(SendResult),
+                                 ArrayRef<char>(ArgData, ArgSize));
+        },
+        std::forward<SendResultT>(SendResult), Args...);
+  }
+
+  /// Run a wrapper function using SPS to serialize the arguments and
+  /// deserialize the results.
+  template <typename SPSSignature, typename SendResultT, typename... ArgTs>
+  void callSPSWrapperAsync(ExecutorAddr WrapperFnAddr, SendResultT &&SendResult,
+                           const ArgTs &...Args) {
+    callSPSWrapperAsync<SPSSignature>(RunAsTask(*D), WrapperFnAddr,
+                                      std::forward<SendResultT>(SendResult),
+                                      Args...);
+  }
+
+  /// Run a wrapper function using SPS to serialize the arguments and
+  /// deserialize the results.
+  ///
+  /// If SPSSignature is a non-void function signature then the second argument
+  /// (the first in the Args list) should be a reference to a return value.
+  template <typename SPSSignature, typename... WrapperCallArgTs>
+  Error callSPSWrapper(ExecutorAddr WrapperFnAddr,
+                       WrapperCallArgTs &&...WrapperCallArgs) {
+    return shared::WrapperFunction<SPSSignature>::call(
+        [this, WrapperFnAddr](const char *ArgData, size_t ArgSize) {
+          return callWrapper(WrapperFnAddr, ArrayRef<char>(ArgData, ArgSize));
+        },
+        std::forward<WrapperCallArgTs>(WrapperCallArgs)...);
+  }
+
   /// Disconnect from the target process.
   ///
   /// This should be called after the JIT session is shut down.
   virtual Error disconnect() = 0;
 
 protected:
-  ExecutorProcessControl(std::shared_ptr<SymbolStringPool> SSP)
-      : SSP(std::move(SSP)) {}
 
   std::shared_ptr<SymbolStringPool> SSP;
+  std::unique_ptr<TaskDispatcher> D;
   ExecutionSession *ES = nullptr;
   Triple TargetTriple;
   unsigned PageSize = 0;
   JITDispatchInfo JDI;
   MemoryAccess *MemAccess = nullptr;
   jitlink::JITLinkMemoryManager *MemMgr = nullptr;
+  StringMap<ExecutorAddr> BootstrapSymbols;
 };
 
 /// A ExecutorProcessControl instance that asserts if any of its methods are
@@ -210,9 +373,12 @@ class UnsupportedExecutorProcessControl : public ExecutorProcessControl {
 public:
   UnsupportedExecutorProcessControl(
       std::shared_ptr<SymbolStringPool> SSP = nullptr,
+      std::unique_ptr<TaskDispatcher> D = nullptr,
       const std::string &TT = "", unsigned PageSize = 0)
       : ExecutorProcessControl(SSP ? std::move(SSP)
-                                   : std::make_shared<SymbolStringPool>()) {
+                               : std::make_shared<SymbolStringPool>(),
+                               D ? std::move(D)
+                               : std::make_unique<InPlaceTaskDispatcher>()) {
     this->TargetTriple = Triple(TT);
     this->PageSize = PageSize;
   }
@@ -226,13 +392,13 @@ public:
     llvm_unreachable("Unsupported");
   }
 
-  Expected<int32_t> runAsMain(JITTargetAddress MainFnAddr,
+  Expected<int32_t> runAsMain(ExecutorAddr MainFnAddr,
                               ArrayRef<std::string> Args) override {
     llvm_unreachable("Unsupported");
   }
 
-  void callWrapperAsync(SendResultFunction OnComplete,
-                        JITTargetAddress WrapperFnAddr,
+  void callWrapperAsync(ExecutorAddr WrapperFnAddr,
+                        IncomingWFRHandler OnComplete,
                         ArrayRef<char> ArgBuffer) override {
     llvm_unreachable("Unsupported");
   }
@@ -246,8 +412,9 @@ class SelfExecutorProcessControl
       private ExecutorProcessControl::MemoryAccess {
 public:
   SelfExecutorProcessControl(
-      std::shared_ptr<SymbolStringPool> SSP, Triple TargetTriple,
-      unsigned PageSize, std::unique_ptr<jitlink::JITLinkMemoryManager> MemMgr);
+      std::shared_ptr<SymbolStringPool> SSP, std::unique_ptr<TaskDispatcher> D,
+      Triple TargetTriple, unsigned PageSize,
+      std::unique_ptr<jitlink::JITLinkMemoryManager> MemMgr);
 
   /// Create a SelfExecutorProcessControl with the given symbol string pool and
   /// memory manager.
@@ -256,6 +423,7 @@ public:
   /// be created and used by default.
   static Expected<std::unique_ptr<SelfExecutorProcessControl>>
   Create(std::shared_ptr<SymbolStringPool> SSP = nullptr,
+         std::unique_ptr<TaskDispatcher> D = nullptr,
          std::unique_ptr<jitlink::JITLinkMemoryManager> MemMgr = nullptr);
 
   Expected<tpctypes::DylibHandle> loadDylib(const char *DylibPath) override;
@@ -263,32 +431,32 @@ public:
   Expected<std::vector<tpctypes::LookupResult>>
   lookupSymbols(ArrayRef<LookupRequest> Request) override;
 
-  Expected<int32_t> runAsMain(JITTargetAddress MainFnAddr,
+  Expected<int32_t> runAsMain(ExecutorAddr MainFnAddr,
                               ArrayRef<std::string> Args) override;
 
-  void callWrapperAsync(SendResultFunction OnComplete,
-                        JITTargetAddress WrapperFnAddr,
+  void callWrapperAsync(ExecutorAddr WrapperFnAddr,
+                        IncomingWFRHandler OnComplete,
                         ArrayRef<char> ArgBuffer) override;
 
   Error disconnect() override;
 
 private:
-  void writeUInt8s(ArrayRef<tpctypes::UInt8Write> Ws,
-                   WriteResultFn OnWriteComplete) override;
+  void writeUInt8sAsync(ArrayRef<tpctypes::UInt8Write> Ws,
+                        WriteResultFn OnWriteComplete) override;
 
-  void writeUInt16s(ArrayRef<tpctypes::UInt16Write> Ws,
-                    WriteResultFn OnWriteComplete) override;
+  void writeUInt16sAsync(ArrayRef<tpctypes::UInt16Write> Ws,
+                         WriteResultFn OnWriteComplete) override;
 
-  void writeUInt32s(ArrayRef<tpctypes::UInt32Write> Ws,
-                    WriteResultFn OnWriteComplete) override;
+  void writeUInt32sAsync(ArrayRef<tpctypes::UInt32Write> Ws,
+                         WriteResultFn OnWriteComplete) override;
 
-  void writeUInt64s(ArrayRef<tpctypes::UInt64Write> Ws,
-                    WriteResultFn OnWriteComplete) override;
+  void writeUInt64sAsync(ArrayRef<tpctypes::UInt64Write> Ws,
+                         WriteResultFn OnWriteComplete) override;
 
-  void writeBuffers(ArrayRef<tpctypes::BufferWrite> Ws,
-                    WriteResultFn OnWriteComplete) override;
+  void writeBuffersAsync(ArrayRef<tpctypes::BufferWrite> Ws,
+                         WriteResultFn OnWriteComplete) override;
 
-  static shared::detail::CWrapperFunctionResult
+  static shared::CWrapperFunctionResult
   jitDispatchViaWrapperFunctionManager(void *Ctx, const void *FnTag,
                                        const char *Data, size_t Size);
 
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h b/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h
index 78e3ceef50e2..4d6d46595fc3 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h
@@ -45,6 +45,13 @@ class PointerType;
 class Triple;
 class Twine;
 class Value;
+class MCDisassembler;
+class MCInstrAnalysis;
+
+namespace jitlink {
+class LinkGraph;
+class Symbol;
+} // namespace jitlink
 
 namespace orc {
 
@@ -557,6 +564,33 @@ GlobalAlias *cloneGlobalAliasDecl(Module &Dst, const GlobalAlias &OrigA,
 void cloneModuleFlagsMetadata(Module &Dst, const Module &Src,
                               ValueToValueMapTy &VMap);
 
+/// Introduce relocations to \p Sym in its own definition if there are any
+/// pointers formed via PC-relative address that do not already have a
+/// relocation.
+///
+/// This is useful when introducing indirection via a stub function at link time
+/// without compiler support. If a function pointer is formed without a
+/// relocation, e.g. in the definition of \c foo
+///
+/// \code
+/// _foo:
+///   leaq -7(%rip), rax # form pointer to _foo without relocation
+/// _bar:
+///   leaq (%rip), %rax  # uses X86_64_RELOC_SIGNED to '_foo'
+/// \endcode
+///
+/// the pointer to \c _foo computed by \c _foo and \c _bar may differ if we
+/// introduce a stub for _foo. If the pointer is used as a key, this may be
+/// observable to the program. This pass will attempt to introduce the missing
+/// "self-relocation" on the leaq instruction.
+///
+/// This is based on disassembly and should be considered "best effort". It may
+/// silently fail to add relocations.
+Error addFunctionPointerRelocationsToCurrentSymbol(jitlink::Symbol &Sym,
+                                                   jitlink::LinkGraph &G,
+                                                   MCDisassembler &Disassembler,
+                                                   MCInstrAnalysis &MIA);
+
 } // end namespace orc
 
 } // end namespace llvm
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/LLVMSPSSerializers.h b/llvm/include/llvm/ExecutionEngine/Orc/LLVMSPSSerializers.h
deleted file mode 100644
index f3d616deae8f..000000000000
--- a/llvm/include/llvm/ExecutionEngine/Orc/LLVMSPSSerializers.h
+++ /dev/null
@@ -1,69 +0,0 @@
-//===-- LLVMSPSSerializers.h - SPS serialization for LLVM types -*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// SPS Serialization for common LLVM types.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_EXECUTIONENGINE_ORC_LLVMSPSSERIALIZERS_H
-#define LLVM_EXECUTIONENGINE_ORC_LLVMSPSSERIALIZERS_H
-
-#include "llvm/ADT/StringMap.h"
-#include "llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h"
-
-namespace llvm {
-namespace orc {
-namespace shared {
-
-template <typename SPSValueT, typename ValueT>
-class SPSSerializationTraits<SPSSequence<SPSTuple<SPSString, SPSValueT>>,
-                             StringMap<ValueT>> {
-public:
-  static size_t size(const StringMap<ValueT> &M) {
-    size_t Sz = SPSArgList<uint64_t>::size(static_cast<uint64_t>(M.size()));
-    for (auto &E : M)
-      Sz += SPSArgList<SPSString, SPSValueT>::size(E.first(), E.second);
-    return Sz;
-  }
-
-  static bool serialize(SPSOutputBuffer &OB, const StringMap<ValueT> &M) {
-    if (!SPSArgList<uint64_t>::serialize(OB, static_cast<uint64_t>(M.size())))
-      return false;
-
-    for (auto &E : M)
-      if (!SPSArgList<SPSString, SPSValueT>::serialize(OB, E.first(), E.second))
-        return false;
-
-    return true;
-  }
-
-  static bool deserialize(SPSInputBuffer &IB, StringMap<ValueT> &M) {
-    uint64_t Size;
-    assert(M.empty() && "M already contains elements");
-
-    if (!SPSArgList<uint64_t>::deserialize(IB, Size))
-      return false;
-
-    while (Size--) {
-      StringRef S;
-      ValueT V;
-      if (!SPSArgList<SPSString, SPSValueT>::deserialize(IB, S, V))
-        return false;
-      if (!M.insert(std::make_pair(S, V)).second)
-        return false;
-    }
-
-    return true;
-  }
-};
-
-} // end namespace shared
-} // end namespace orc
-} // end namespace llvm
-
-#endif // LLVM_EXECUTIONENGINE_ORC_LLVMSPSSERIALIZERS_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/LookupAndRecordAddrs.h b/llvm/include/llvm/ExecutionEngine/Orc/LookupAndRecordAddrs.h
new file mode 100644
index 000000000000..a598405ee4f6
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/Orc/LookupAndRecordAddrs.h
@@ -0,0 +1,70 @@
+//===-- LookupAndRecordAddrs.h - Symbol lookup support utility --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Record the addresses of a set of symbols into ExecutorAddr objects.
+//
+// This can be used to avoid repeated lookup (via ExecutionSession::lookup) of
+// the given symbols.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_LOOKUPANDRECORDADDRS_H
+#define LLVM_EXECUTIONENGINE_ORC_LOOKUPANDRECORDADDRS_H
+
+#include "llvm/ADT/FunctionExtras.h"
+#include "llvm/ExecutionEngine/Orc/Core.h"
+#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h"
+
+#include <vector>
+
+namespace llvm {
+namespace orc {
+
+/// Record addresses of the given symbols in the given ExecutorAddrs.
+///
+/// Useful for making permanent records of symbol addreses to call or
+/// access in the executor (e.g. runtime support functions in Platform
+/// subclasses).
+///
+/// By default the symbols are looked up using
+/// SymbolLookupFlags::RequiredSymbol, and an error will be generated if any of
+/// the requested symbols are not defined.
+///
+/// If SymbolLookupFlags::WeaklyReferencedSymbol is used then any missing
+/// symbols will have their corresponding address objects set to zero, and
+/// this function will never generate an error (the caller will need to check
+/// addresses before using them).
+///
+/// Asynchronous version.
+void lookupAndRecordAddrs(
+    unique_function<void(Error)> OnRecorded, ExecutionSession &ES, LookupKind K,
+    const JITDylibSearchOrder &SearchOrder,
+    std::vector<std::pair<SymbolStringPtr, ExecutorAddr *>> Pairs,
+    SymbolLookupFlags LookupFlags = SymbolLookupFlags::RequiredSymbol);
+
+/// Record addresses of the given symbols in the given ExecutorAddrs.
+///
+/// Blocking version.
+Error lookupAndRecordAddrs(
+    ExecutionSession &ES, LookupKind K, const JITDylibSearchOrder &SearchOrder,
+    std::vector<std::pair<SymbolStringPtr, ExecutorAddr *>> Pairs,
+    SymbolLookupFlags LookupFlags = SymbolLookupFlags::RequiredSymbol);
+
+/// Record addresses of given symbols in the given ExecutorAddrs.
+///
+/// ExecutorProcessControl lookup version. Lookups are always implicitly
+/// weak.
+Error lookupAndRecordAddrs(
+    ExecutorProcessControl &EPC, tpctypes::DylibHandle H,
+    std::vector<std::pair<SymbolStringPtr, ExecutorAddr *>> Pairs,
+    SymbolLookupFlags LookupFlags = SymbolLookupFlags::RequiredSymbol);
+
+} // End namespace orc
+} // End namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_LOOKUPANDRECORDADDRS_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h b/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h
index f77dfd208413..d7b5e2eda6ee 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h
@@ -16,7 +16,6 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ExecutionEngine/Orc/Core.h"
 #include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h"
-#include "llvm/ExecutionEngine/Orc/LLVMSPSSerializers.h"
 #include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h"
 #include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h"
 
@@ -27,22 +26,16 @@
 namespace llvm {
 namespace orc {
 
-struct MachOPerObjectSectionsToRegister {
-  ExecutorAddressRange EHFrameSection;
-  ExecutorAddressRange ThreadDataSection;
-};
-
 struct MachOJITDylibInitializers {
-  using SectionList = std::vector<ExecutorAddressRange>;
+  using SectionList = std::vector<ExecutorAddrRange>;
 
-  MachOJITDylibInitializers(std::string Name,
-                            ExecutorAddress MachOHeaderAddress)
+  MachOJITDylibInitializers(std::string Name, ExecutorAddr MachOHeaderAddress)
       : Name(std::move(Name)),
         MachOHeaderAddress(std::move(MachOHeaderAddress)) {}
 
   std::string Name;
-  ExecutorAddress MachOHeaderAddress;
-  ExecutorAddress ObjCImageInfoAddress;
+  ExecutorAddr MachOHeaderAddress;
+  ExecutorAddr ObjCImageInfoAddress;
 
   StringMap<SectionList> InitSections;
 };
@@ -155,15 +148,12 @@ private:
     using InitSymbolDepMap =
         DenseMap<MaterializationResponsibility *, JITLinkSymbolSet>;
 
-    void addInitializerSupportPasses(MaterializationResponsibility &MR,
-                                     jitlink::PassConfiguration &Config);
-
-    void addMachOHeaderSupportPasses(MaterializationResponsibility &MR,
-                                     jitlink::PassConfiguration &Config);
-
     void addEHAndTLVSupportPasses(MaterializationResponsibility &MR,
                                   jitlink::PassConfiguration &Config);
 
+    Error associateJITDylibHeaderSymbol(jitlink::LinkGraph &G,
+                                        MaterializationResponsibility &MR);
+
     Error preserveInitSections(jitlink::LinkGraph &G,
                                MaterializationResponsibility &MR);
 
@@ -174,6 +164,10 @@ private:
 
     Error fixTLVSectionsAndEdges(jitlink::LinkGraph &G, JITDylib &JD);
 
+    Error registerEHAndTLVSections(jitlink::LinkGraph &G);
+
+    Error registerEHSectionsPhase1(jitlink::LinkGraph &G);
+
     std::mutex PluginMutex;
     MachOPlatform &MP;
     DenseMap<JITDylib *, std::pair<uint32_t, uint32_t>> ObjCImageInfos;
@@ -186,7 +180,7 @@ private:
   using SendDeinitializerSequenceFn =
       unique_function<void(Expected<MachOJITDylibDeinitializerSequence>)>;
 
-  using SendSymbolAddressFn = unique_function<void(Expected<ExecutorAddress>)>;
+  using SendSymbolAddressFn = unique_function<void(Expected<ExecutorAddr>)>;
 
   static bool supportedTarget(const Triple &TT);
 
@@ -209,31 +203,34 @@ private:
                           StringRef JDName);
 
   void rt_getDeinitializers(SendDeinitializerSequenceFn SendResult,
-                            ExecutorAddress Handle);
+                            ExecutorAddr Handle);
 
-  void rt_lookupSymbol(SendSymbolAddressFn SendResult, ExecutorAddress Handle,
+  void rt_lookupSymbol(SendSymbolAddressFn SendResult, ExecutorAddr Handle,
                        StringRef SymbolName);
 
   // Records the addresses of runtime symbols used by the platform.
   Error bootstrapMachORuntime(JITDylib &PlatformJD);
 
-  Error registerInitInfo(JITDylib &JD, ExecutorAddress ObjCImageInfoAddr,
+  Error registerInitInfo(JITDylib &JD, ExecutorAddr ObjCImageInfoAddr,
                          ArrayRef<jitlink::Section *> InitSections);
 
-  Error registerPerObjectSections(const MachOPerObjectSectionsToRegister &POSR);
-
   Expected<uint64_t> createPThreadKey();
 
+  enum PlatformState { BootstrapPhase1, BootstrapPhase2, Initialized };
+
   ExecutionSession &ES;
   ObjectLinkingLayer &ObjLinkingLayer;
 
   SymbolStringPtr MachOHeaderStartSymbol;
-  std::atomic<bool> RuntimeBootstrapped{false};
+  std::atomic<PlatformState> State{BootstrapPhase1};
 
-  ExecutorAddress orc_rt_macho_platform_bootstrap;
-  ExecutorAddress orc_rt_macho_platform_shutdown;
-  ExecutorAddress orc_rt_macho_register_object_sections;
-  ExecutorAddress orc_rt_macho_create_pthread_key;
+  ExecutorAddr orc_rt_macho_platform_bootstrap;
+  ExecutorAddr orc_rt_macho_platform_shutdown;
+  ExecutorAddr orc_rt_macho_register_ehframe_section;
+  ExecutorAddr orc_rt_macho_deregister_ehframe_section;
+  ExecutorAddr orc_rt_macho_register_thread_data_section;
+  ExecutorAddr orc_rt_macho_deregister_thread_data_section;
+  ExecutorAddr orc_rt_macho_create_pthread_key;
 
   DenseMap<JITDylib *, SymbolLookupSet> RegisteredInitSymbols;
 
@@ -241,7 +238,6 @@ private:
   // aggregating data from the jitlink.
   std::mutex PlatformMutex;
   DenseMap<JITDylib *, MachOJITDylibInitializers> InitSeqs;
-  std::vector<MachOPerObjectSectionsToRegister> BootstrapPOSRs;
 
   DenseMap<JITTargetAddress, JITDylib *> HeaderAddrToJITDylib;
   DenseMap<JITDylib *, uint64_t> JITDylibToPThreadKey;
@@ -249,38 +245,12 @@ private:
 
 namespace shared {
 
-using SPSMachOPerObjectSectionsToRegister =
-    SPSTuple<SPSExecutorAddressRange, SPSExecutorAddressRange>;
-
-template <>
-class SPSSerializationTraits<SPSMachOPerObjectSectionsToRegister,
-                             MachOPerObjectSectionsToRegister> {
-
-public:
-  static size_t size(const MachOPerObjectSectionsToRegister &MOPOSR) {
-    return SPSMachOPerObjectSectionsToRegister::AsArgList::size(
-        MOPOSR.EHFrameSection, MOPOSR.ThreadDataSection);
-  }
-
-  static bool serialize(SPSOutputBuffer &OB,
-                        const MachOPerObjectSectionsToRegister &MOPOSR) {
-    return SPSMachOPerObjectSectionsToRegister::AsArgList::serialize(
-        OB, MOPOSR.EHFrameSection, MOPOSR.ThreadDataSection);
-  }
-
-  static bool deserialize(SPSInputBuffer &IB,
-                          MachOPerObjectSectionsToRegister &MOPOSR) {
-    return SPSMachOPerObjectSectionsToRegister::AsArgList::deserialize(
-        IB, MOPOSR.EHFrameSection, MOPOSR.ThreadDataSection);
-  }
-};
-
-using SPSNamedExecutorAddressRangeSequenceMap =
-    SPSSequence<SPSTuple<SPSString, SPSExecutorAddressRangeSequence>>;
+using SPSNamedExecutorAddrRangeSequenceMap =
+    SPSSequence<SPSTuple<SPSString, SPSExecutorAddrRangeSequence>>;
 
 using SPSMachOJITDylibInitializers =
-    SPSTuple<SPSString, SPSExecutorAddress, SPSExecutorAddress,
-             SPSNamedExecutorAddressRangeSequenceMap>;
+    SPSTuple<SPSString, SPSExecutorAddr, SPSExecutorAddr,
+             SPSNamedExecutorAddrRangeSequenceMap>;
 
 using SPSMachOJITDylibInitializerSequence =
     SPSSequence<SPSMachOJITDylibInitializers>;
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h b/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h
index 5632118eee4e..109922a46e26 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h
@@ -184,13 +184,13 @@ public:
   }
 
 private:
-  using AllocPtr = std::unique_ptr<jitlink::JITLinkMemoryManager::Allocation>;
+  using FinalizedAlloc = jitlink::JITLinkMemoryManager::FinalizedAlloc;
 
   void modifyPassConfig(MaterializationResponsibility &MR,
                         jitlink::LinkGraph &G,
                         jitlink::PassConfiguration &PassConfig);
   void notifyLoaded(MaterializationResponsibility &MR);
-  Error notifyEmitted(MaterializationResponsibility &MR, AllocPtr Alloc);
+  Error notifyEmitted(MaterializationResponsibility &MR, FinalizedAlloc FA);
 
   Error handleRemoveResources(ResourceKey K) override;
   void handleTransferResources(ResourceKey DstKey, ResourceKey SrcKey) override;
@@ -201,7 +201,7 @@ private:
   bool OverrideObjectFlags = false;
   bool AutoClaimObjectSymbols = false;
   ReturnObjectBufferFunction ReturnObjectBuffer;
-  DenseMap<ResourceKey, std::vector<AllocPtr>> Allocs;
+  DenseMap<ResourceKey, std::vector<FinalizedAlloc>> Allocs;
   std::vector<std::unique_ptr<Plugin>> Plugins;
 };
 
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/OrcRPCExecutorProcessControl.h b/llvm/include/llvm/ExecutionEngine/Orc/OrcRPCExecutorProcessControl.h
deleted file mode 100644
index 4310ba9ce9e0..000000000000
--- a/llvm/include/llvm/ExecutionEngine/Orc/OrcRPCExecutorProcessControl.h
+++ /dev/null
@@ -1,436 +0,0 @@
-//===-- OrcRPCExecutorProcessControl.h - Remote target control --*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Executor control via ORC RPC.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_EXECUTIONENGINE_ORC_ORCRPCEXECUTORPROCESSCONTROL_H
-#define LLVM_EXECUTIONENGINE_ORC_ORCRPCEXECUTORPROCESSCONTROL_H
-
-#include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h"
-#include "llvm/ExecutionEngine/Orc/Shared/RPCUtils.h"
-#include "llvm/ExecutionEngine/Orc/Shared/RawByteChannel.h"
-#include "llvm/ExecutionEngine/Orc/TargetProcess/OrcRPCTPCServer.h"
-#include "llvm/Support/MSVCErrorWorkarounds.h"
-
-namespace llvm {
-namespace orc {
-
-/// JITLinkMemoryManager implementation for a process connected via an ORC RPC
-/// endpoint.
-template <typename OrcRPCEPCImplT>
-class OrcRPCEPCJITLinkMemoryManager : public jitlink::JITLinkMemoryManager {
-private:
-  struct HostAlloc {
-    std::unique_ptr<char[]> Mem;
-    uint64_t Size;
-  };
-
-  struct TargetAlloc {
-    JITTargetAddress Address = 0;
-    uint64_t AllocatedSize = 0;
-  };
-
-  using HostAllocMap = DenseMap<int, HostAlloc>;
-  using TargetAllocMap = DenseMap<int, TargetAlloc>;
-
-public:
-  class OrcRPCAllocation : public Allocation {
-  public:
-    OrcRPCAllocation(OrcRPCEPCJITLinkMemoryManager<OrcRPCEPCImplT> &Parent,
-                     HostAllocMap HostAllocs, TargetAllocMap TargetAllocs)
-        : Parent(Parent), HostAllocs(std::move(HostAllocs)),
-          TargetAllocs(std::move(TargetAllocs)) {
-      assert(HostAllocs.size() == TargetAllocs.size() &&
-             "HostAllocs size should match TargetAllocs");
-    }
-
-    ~OrcRPCAllocation() override {
-      assert(TargetAllocs.empty() && "failed to deallocate");
-    }
-
-    MutableArrayRef<char> getWorkingMemory(ProtectionFlags Seg) override {
-      auto I = HostAllocs.find(Seg);
-      assert(I != HostAllocs.end() && "No host allocation for segment");
-      auto &HA = I->second;
-      return {HA.Mem.get(), static_cast<size_t>(HA.Size)};
-    }
-
-    JITTargetAddress getTargetMemory(ProtectionFlags Seg) override {
-      auto I = TargetAllocs.find(Seg);
-      assert(I != TargetAllocs.end() && "No target allocation for segment");
-      return I->second.Address;
-    }
-
-    void finalizeAsync(FinalizeContinuation OnFinalize) override {
-
-      std::vector<tpctypes::BufferWrite> BufferWrites;
-      orcrpctpc::ReleaseOrFinalizeMemRequest FMR;
-
-      for (auto &KV : HostAllocs) {
-        assert(TargetAllocs.count(KV.first) &&
-               "No target allocation for buffer");
-        auto &HA = KV.second;
-        auto &TA = TargetAllocs[KV.first];
-        BufferWrites.push_back({TA.Address, StringRef(HA.Mem.get(), HA.Size)});
-        FMR.push_back({orcrpctpc::toWireProtectionFlags(
-                           static_cast<sys::Memory::ProtectionFlags>(KV.first)),
-                       TA.Address, TA.AllocatedSize});
-      }
-
-      DEBUG_WITH_TYPE("orc", {
-        dbgs() << "finalizeAsync " << (void *)this << ":\n";
-        auto FMRI = FMR.begin();
-        for (auto &B : BufferWrites) {
-          auto Prot = FMRI->Prot;
-          ++FMRI;
-          dbgs() << "  Writing " << formatv("{0:x16}", B.Buffer.size())
-                 << " bytes to " << ((Prot & orcrpctpc::WPF_Read) ? 'R' : '-')
-                 << ((Prot & orcrpctpc::WPF_Write) ? 'W' : '-')
-                 << ((Prot & orcrpctpc::WPF_Exec) ? 'X' : '-')
-                 << " segment: local " << (const void *)B.Buffer.data()
-                 << " -> target " << formatv("{0:x16}", B.Address) << "\n";
-        }
-      });
-      if (auto Err =
-              Parent.Parent.getMemoryAccess().writeBuffers(BufferWrites)) {
-        OnFinalize(std::move(Err));
-        return;
-      }
-
-      DEBUG_WITH_TYPE("orc", dbgs() << " Applying permissions...\n");
-      if (auto Err =
-              Parent.getEndpoint().template callAsync<orcrpctpc::FinalizeMem>(
-                  [OF = std::move(OnFinalize)](Error Err2) {
-                    // FIXME: Dispatch to work queue.
-                    std::thread([OF = std::move(OF),
-                                 Err3 = std::move(Err2)]() mutable {
-                      DEBUG_WITH_TYPE(
-                          "orc", { dbgs() << "  finalizeAsync complete\n"; });
-                      OF(std::move(Err3));
-                    }).detach();
-                    return Error::success();
-                  },
-                  FMR)) {
-        DEBUG_WITH_TYPE("orc", dbgs() << "    failed.\n");
-        Parent.getEndpoint().abandonPendingResponses();
-        Parent.reportError(std::move(Err));
-      }
-      DEBUG_WITH_TYPE("orc", {
-        dbgs() << "Leaving finalizeAsync (finalization may continue in "
-                  "background)\n";
-      });
-    }
-
-    Error deallocate() override {
-      orcrpctpc::ReleaseOrFinalizeMemRequest RMR;
-      for (auto &KV : TargetAllocs)
-        RMR.push_back({orcrpctpc::toWireProtectionFlags(
-                           static_cast<sys::Memory::ProtectionFlags>(KV.first)),
-                       KV.second.Address, KV.second.AllocatedSize});
-      TargetAllocs.clear();
-
-      return Parent.getEndpoint().template callB<orcrpctpc::ReleaseMem>(RMR);
-    }
-
-  private:
-    OrcRPCEPCJITLinkMemoryManager<OrcRPCEPCImplT> &Parent;
-    HostAllocMap HostAllocs;
-    TargetAllocMap TargetAllocs;
-  };
-
-  OrcRPCEPCJITLinkMemoryManager(OrcRPCEPCImplT &Parent) : Parent(Parent) {}
-
-  Expected<std::unique_ptr<Allocation>>
-  allocate(const jitlink::JITLinkDylib *JD,
-           const SegmentsRequestMap &Request) override {
-    orcrpctpc::ReserveMemRequest RMR;
-    HostAllocMap HostAllocs;
-
-    for (auto &KV : Request) {
-      assert(KV.second.getContentSize() <= std::numeric_limits<size_t>::max() &&
-             "Content size is out-of-range for host");
-
-      RMR.push_back({orcrpctpc::toWireProtectionFlags(
-                         static_cast<sys::Memory::ProtectionFlags>(KV.first)),
-                     KV.second.getContentSize() + KV.second.getZeroFillSize(),
-                     KV.second.getAlignment()});
-      HostAllocs[KV.first] = {
-          std::make_unique<char[]>(KV.second.getContentSize()),
-          KV.second.getContentSize()};
-    }
-
-    DEBUG_WITH_TYPE("orc", {
-      dbgs() << "Orc remote memmgr got request:\n";
-      for (auto &KV : Request)
-        dbgs() << "  permissions: "
-               << ((KV.first & sys::Memory::MF_READ) ? 'R' : '-')
-               << ((KV.first & sys::Memory::MF_WRITE) ? 'W' : '-')
-               << ((KV.first & sys::Memory::MF_EXEC) ? 'X' : '-')
-               << ", content size: "
-               << formatv("{0:x16}", KV.second.getContentSize())
-               << " + zero-fill-size: "
-               << formatv("{0:x16}", KV.second.getZeroFillSize())
-               << ", align: " << KV.second.getAlignment() << "\n";
-    });
-
-    // FIXME: LLVM RPC needs to be fixed to support alt
-    // serialization/deserialization on return types. For now just
-    // translate from std::map to DenseMap manually.
-    auto TmpTargetAllocs =
-        Parent.getEndpoint().template callB<orcrpctpc::ReserveMem>(RMR);
-    if (!TmpTargetAllocs)
-      return TmpTargetAllocs.takeError();
-
-    if (TmpTargetAllocs->size() != RMR.size())
-      return make_error<StringError>(
-          "Number of target allocations does not match request",
-          inconvertibleErrorCode());
-
-    TargetAllocMap TargetAllocs;
-    for (auto &E : *TmpTargetAllocs)
-      TargetAllocs[orcrpctpc::fromWireProtectionFlags(E.Prot)] = {
-          E.Address, E.AllocatedSize};
-
-    DEBUG_WITH_TYPE("orc", {
-      auto HAI = HostAllocs.begin();
-      for (auto &KV : TargetAllocs)
-        dbgs() << "  permissions: "
-               << ((KV.first & sys::Memory::MF_READ) ? 'R' : '-')
-               << ((KV.first & sys::Memory::MF_WRITE) ? 'W' : '-')
-               << ((KV.first & sys::Memory::MF_EXEC) ? 'X' : '-')
-               << " assigned local " << (void *)HAI->second.Mem.get()
-               << ", target " << formatv("{0:x16}", KV.second.Address) << "\n";
-    });
-
-    return std::make_unique<OrcRPCAllocation>(*this, std::move(HostAllocs),
-                                              std::move(TargetAllocs));
-  }
-
-private:
-  void reportError(Error Err) { Parent.reportError(std::move(Err)); }
-
-  decltype(std::declval<OrcRPCEPCImplT>().getEndpoint()) getEndpoint() {
-    return Parent.getEndpoint();
-  }
-
-  OrcRPCEPCImplT &Parent;
-};
-
-/// ExecutorProcessControl::MemoryAccess implementation for a process connected
-/// via an ORC RPC endpoint.
-template <typename OrcRPCEPCImplT>
-class OrcRPCEPCMemoryAccess : public ExecutorProcessControl::MemoryAccess {
-public:
-  OrcRPCEPCMemoryAccess(OrcRPCEPCImplT &Parent) : Parent(Parent) {}
-
-  void writeUInt8s(ArrayRef<tpctypes::UInt8Write> Ws,
-                   WriteResultFn OnWriteComplete) override {
-    writeViaRPC<orcrpctpc::WriteUInt8s>(Ws, std::move(OnWriteComplete));
-  }
-
-  void writeUInt16s(ArrayRef<tpctypes::UInt16Write> Ws,
-                    WriteResultFn OnWriteComplete) override {
-    writeViaRPC<orcrpctpc::WriteUInt16s>(Ws, std::move(OnWriteComplete));
-  }
-
-  void writeUInt32s(ArrayRef<tpctypes::UInt32Write> Ws,
-                    WriteResultFn OnWriteComplete) override {
-    writeViaRPC<orcrpctpc::WriteUInt32s>(Ws, std::move(OnWriteComplete));
-  }
-
-  void writeUInt64s(ArrayRef<tpctypes::UInt64Write> Ws,
-                    WriteResultFn OnWriteComplete) override {
-    writeViaRPC<orcrpctpc::WriteUInt64s>(Ws, std::move(OnWriteComplete));
-  }
-
-  void writeBuffers(ArrayRef<tpctypes::BufferWrite> Ws,
-                    WriteResultFn OnWriteComplete) override {
-    writeViaRPC<orcrpctpc::WriteBuffers>(Ws, std::move(OnWriteComplete));
-  }
-
-private:
-  template <typename WriteRPCFunction, typename WriteElementT>
-  void writeViaRPC(ArrayRef<WriteElementT> Ws, WriteResultFn OnWriteComplete) {
-    if (auto Err = Parent.getEndpoint().template callAsync<WriteRPCFunction>(
-            [OWC = std::move(OnWriteComplete)](Error Err2) mutable -> Error {
-              OWC(std::move(Err2));
-              return Error::success();
-            },
-            Ws)) {
-      Parent.reportError(std::move(Err));
-      Parent.getEndpoint().abandonPendingResponses();
-    }
-  }
-
-  OrcRPCEPCImplT &Parent;
-};
-
-// ExecutorProcessControl for a process connected via an ORC RPC Endpoint.
-template <typename RPCEndpointT>
-class OrcRPCExecutorProcessControlBase : public ExecutorProcessControl {
-public:
-  using ErrorReporter = unique_function<void(Error)>;
-
-  using OnCloseConnectionFunction = unique_function<Error(Error)>;
-
-  OrcRPCExecutorProcessControlBase(std::shared_ptr<SymbolStringPool> SSP,
-                                   RPCEndpointT &EP, ErrorReporter ReportError)
-      : ExecutorProcessControl(std::move(SSP)),
-        ReportError(std::move(ReportError)), EP(EP) {
-    using ThisT = OrcRPCExecutorProcessControlBase<RPCEndpointT>;
-    EP.template addAsyncHandler<orcrpctpc::RunWrapper>(*this,
-                                                       &ThisT::runWrapperInJIT);
-  }
-
-  void reportError(Error Err) { ReportError(std::move(Err)); }
-
-  RPCEndpointT &getEndpoint() { return EP; }
-
-  Expected<tpctypes::DylibHandle> loadDylib(const char *DylibPath) override {
-    DEBUG_WITH_TYPE("orc", {
-      dbgs() << "Loading dylib \"" << (DylibPath ? DylibPath : "") << "\" ";
-      if (!DylibPath)
-        dbgs() << "(process symbols)";
-      dbgs() << "\n";
-    });
-    if (!DylibPath)
-      DylibPath = "";
-    auto H = EP.template callB<orcrpctpc::LoadDylib>(DylibPath);
-    DEBUG_WITH_TYPE("orc", {
-      if (H)
-        dbgs() << "  got handle " << formatv("{0:x16}", *H) << "\n";
-      else
-        dbgs() << "  error, unable to load\n";
-    });
-    return H;
-  }
-
-  Expected<std::vector<tpctypes::LookupResult>>
-  lookupSymbols(ArrayRef<LookupRequest> Request) override {
-    std::vector<orcrpctpc::RemoteLookupRequest> RR;
-    for (auto &E : Request) {
-      RR.push_back({});
-      RR.back().first = E.Handle;
-      for (auto &KV : E.Symbols)
-        RR.back().second.push_back(
-            {(*KV.first).str(),
-             KV.second == SymbolLookupFlags::WeaklyReferencedSymbol});
-    }
-    DEBUG_WITH_TYPE("orc", {
-      dbgs() << "Compound lookup:\n";
-      for (auto &R : Request) {
-        dbgs() << "  In " << formatv("{0:x16}", R.Handle) << ": {";
-        bool First = true;
-        for (auto &KV : R.Symbols) {
-          dbgs() << (First ? "" : ",") << " " << *KV.first;
-          First = false;
-        }
-        dbgs() << " }\n";
-      }
-    });
-    return EP.template callB<orcrpctpc::LookupSymbols>(RR);
-  }
-
-  Expected<int32_t> runAsMain(JITTargetAddress MainFnAddr,
-                              ArrayRef<std::string> Args) override {
-    DEBUG_WITH_TYPE("orc", {
-      dbgs() << "Running as main: " << formatv("{0:x16}", MainFnAddr)
-             << ", args = [";
-      for (unsigned I = 0; I != Args.size(); ++I)
-        dbgs() << (I ? "," : "") << " \"" << Args[I] << "\"";
-      dbgs() << "]\n";
-    });
-    auto Result = EP.template callB<orcrpctpc::RunMain>(MainFnAddr, Args);
-    DEBUG_WITH_TYPE("orc", {
-      dbgs() << "  call to " << formatv("{0:x16}", MainFnAddr);
-      if (Result)
-        dbgs() << " returned result " << *Result << "\n";
-      else
-        dbgs() << " failed\n";
-    });
-    return Result;
-  }
-
-  void callWrapperAsync(SendResultFunction OnComplete,
-                        JITTargetAddress WrapperFnAddr,
-                        ArrayRef<char> ArgBuffer) override {
-    DEBUG_WITH_TYPE("orc", {
-      dbgs() << "Running as wrapper function "
-             << formatv("{0:x16}", WrapperFnAddr) << " with "
-             << formatv("{0:x16}", ArgBuffer.size()) << " argument buffer\n";
-    });
-    auto Result = EP.template callB<orcrpctpc::RunWrapper>(
-        WrapperFnAddr,
-        ArrayRef<uint8_t>(reinterpret_cast<const uint8_t *>(ArgBuffer.data()),
-                          ArgBuffer.size()));
-
-    if (!Result)
-      OnComplete(shared::WrapperFunctionResult::createOutOfBandError(
-          toString(Result.takeError())));
-    OnComplete(std::move(*Result));
-  }
-
-  Error closeConnection(OnCloseConnectionFunction OnCloseConnection) {
-    DEBUG_WITH_TYPE("orc", dbgs() << "Closing connection to remote\n");
-    return EP.template callAsync<orcrpctpc::CloseConnection>(
-        std::move(OnCloseConnection));
-  }
-
-  Error closeConnectionAndWait() {
-    std::promise<MSVCPError> P;
-    auto F = P.get_future();
-    if (auto Err = closeConnection([&](Error Err2) -> Error {
-          P.set_value(std::move(Err2));
-          return Error::success();
-        })) {
-      EP.abandonAllPendingResponses();
-      return joinErrors(std::move(Err), F.get());
-    }
-    return F.get();
-  }
-
-protected:
-  /// Subclasses must call this during construction to initialize the
-  /// TargetTriple and PageSize members.
-  Error initializeORCRPCEPCBase() {
-    if (auto EPI = EP.template callB<orcrpctpc::GetExecutorProcessInfo>()) {
-      this->TargetTriple = Triple(EPI->Triple);
-      this->PageSize = PageSize;
-      this->JDI = {ExecutorAddress(EPI->DispatchFuncAddr),
-                   ExecutorAddress(EPI->DispatchCtxAddr)};
-      return Error::success();
-    } else
-      return EPI.takeError();
-  }
-
-private:
-  Error runWrapperInJIT(
-      std::function<Error(Expected<shared::WrapperFunctionResult>)> SendResult,
-      JITTargetAddress FunctionTag, std::vector<uint8_t> ArgBuffer) {
-
-    getExecutionSession().runJITDispatchHandler(
-        [this, SendResult = std::move(SendResult)](
-            Expected<shared::WrapperFunctionResult> R) {
-          if (auto Err = SendResult(std::move(R)))
-            ReportError(std::move(Err));
-        },
-        FunctionTag,
-        {reinterpret_cast<const char *>(ArgBuffer.data()), ArgBuffer.size()});
-    return Error::success();
-  }
-
-  ErrorReporter ReportError;
-  RPCEndpointT &EP;
-};
-
-} // end namespace orc
-} // end namespace llvm
-
-#endif // LLVM_EXECUTIONENGINE_ORC_ORCRPCEXECUTORPROCESSCONTROL_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h b/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h
deleted file mode 100644
index 3d139740d677..000000000000
--- a/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h
+++ /dev/null
@@ -1,925 +0,0 @@
-//===- OrcRemoteTargetClient.h - Orc Remote-target Client -------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the OrcRemoteTargetClient class and helpers. This class
-// can be used to communicate over an RawByteChannel with an
-// OrcRemoteTargetServer instance to support remote-JITing.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETCLIENT_H
-#define LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETCLIENT_H
-
-#include "llvm/ADT/Optional.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/ExecutionEngine/JITSymbol.h"
-#include "llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h"
-#include "llvm/ExecutionEngine/Orc/IndirectionUtils.h"
-#include "llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h"
-#include "llvm/ExecutionEngine/RuntimeDyld.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/Error.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/Memory.h"
-#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
-#include <cassert>
-#include <cstdint>
-#include <memory>
-#include <string>
-#include <tuple>
-#include <utility>
-#include <vector>
-
-#define DEBUG_TYPE "orc-remote"
-
-namespace llvm {
-namespace orc {
-namespace remote {
-
-/// This class provides utilities (including memory manager, indirect stubs
-/// manager, and compile callback manager types) that support remote JITing
-/// in ORC.
-///
-/// Each of the utility classes talks to a JIT server (an instance of the
-/// OrcRemoteTargetServer class) via an RPC system (see RPCUtils.h) to carry out
-/// its actions.
-class OrcRemoteTargetClient
-    : public shared::SingleThreadedRPCEndpoint<shared::RawByteChannel> {
-public:
-  /// Remote-mapped RuntimeDyld-compatible memory manager.
-  class RemoteRTDyldMemoryManager : public RuntimeDyld::MemoryManager {
-    friend class OrcRemoteTargetClient;
-
-  public:
-    ~RemoteRTDyldMemoryManager() {
-      Client.destroyRemoteAllocator(Id);
-      LLVM_DEBUG(dbgs() << "Destroyed remote allocator " << Id << "\n");
-    }
-
-    RemoteRTDyldMemoryManager(const RemoteRTDyldMemoryManager &) = delete;
-    RemoteRTDyldMemoryManager &
-    operator=(const RemoteRTDyldMemoryManager &) = delete;
-    RemoteRTDyldMemoryManager(RemoteRTDyldMemoryManager &&) = default;
-    RemoteRTDyldMemoryManager &operator=(RemoteRTDyldMemoryManager &&) = delete;
-
-    uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
-                                 unsigned SectionID,
-                                 StringRef SectionName) override {
-      Unmapped.back().CodeAllocs.emplace_back(Size, Alignment);
-      uint8_t *Alloc = reinterpret_cast<uint8_t *>(
-          Unmapped.back().CodeAllocs.back().getLocalAddress());
-      LLVM_DEBUG(dbgs() << "Allocator " << Id << " allocated code for "
-                        << SectionName << ": " << Alloc << " (" << Size
-                        << " bytes, alignment " << Alignment << ")\n");
-      return Alloc;
-    }
-
-    uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
-                                 unsigned SectionID, StringRef SectionName,
-                                 bool IsReadOnly) override {
-      if (IsReadOnly) {
-        Unmapped.back().RODataAllocs.emplace_back(Size, Alignment);
-        uint8_t *Alloc = reinterpret_cast<uint8_t *>(
-            Unmapped.back().RODataAllocs.back().getLocalAddress());
-        LLVM_DEBUG(dbgs() << "Allocator " << Id << " allocated ro-data for "
-                          << SectionName << ": " << Alloc << " (" << Size
-                          << " bytes, alignment " << Alignment << ")\n");
-        return Alloc;
-      } // else...
-
-      Unmapped.back().RWDataAllocs.emplace_back(Size, Alignment);
-      uint8_t *Alloc = reinterpret_cast<uint8_t *>(
-          Unmapped.back().RWDataAllocs.back().getLocalAddress());
-      LLVM_DEBUG(dbgs() << "Allocator " << Id << " allocated rw-data for "
-                        << SectionName << ": " << Alloc << " (" << Size
-                        << " bytes, alignment " << Alignment << ")\n");
-      return Alloc;
-    }
-
-    void reserveAllocationSpace(uintptr_t CodeSize, uint32_t CodeAlign,
-                                uintptr_t RODataSize, uint32_t RODataAlign,
-                                uintptr_t RWDataSize,
-                                uint32_t RWDataAlign) override {
-      Unmapped.push_back(ObjectAllocs());
-
-      LLVM_DEBUG(dbgs() << "Allocator " << Id << " reserved:\n");
-
-      if (CodeSize != 0) {
-        Unmapped.back().RemoteCodeAddr =
-            Client.reserveMem(Id, CodeSize, CodeAlign);
-
-        LLVM_DEBUG(
-            dbgs() << "  code: "
-                   << format("0x%016" PRIx64, Unmapped.back().RemoteCodeAddr)
-                   << " (" << CodeSize << " bytes, alignment " << CodeAlign
-                   << ")\n");
-      }
-
-      if (RODataSize != 0) {
-        Unmapped.back().RemoteRODataAddr =
-            Client.reserveMem(Id, RODataSize, RODataAlign);
-
-        LLVM_DEBUG(
-            dbgs() << "  ro-data: "
-                   << format("0x%016" PRIx64, Unmapped.back().RemoteRODataAddr)
-                   << " (" << RODataSize << " bytes, alignment " << RODataAlign
-                   << ")\n");
-      }
-
-      if (RWDataSize != 0) {
-        Unmapped.back().RemoteRWDataAddr =
-            Client.reserveMem(Id, RWDataSize, RWDataAlign);
-
-        LLVM_DEBUG(
-            dbgs() << "  rw-data: "
-                   << format("0x%016" PRIx64, Unmapped.back().RemoteRWDataAddr)
-                   << " (" << RWDataSize << " bytes, alignment " << RWDataAlign
-                   << ")\n");
-      }
-    }
-
-    bool needsToReserveAllocationSpace() override { return true; }
-
-    void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr,
-                          size_t Size) override {
-      UnfinalizedEHFrames.push_back({LoadAddr, Size});
-    }
-
-    void deregisterEHFrames() override {
-      for (auto &Frame : RegisteredEHFrames) {
-        // FIXME: Add error poll.
-        Client.deregisterEHFrames(Frame.Addr, Frame.Size);
-      }
-    }
-
-    void notifyObjectLoaded(RuntimeDyld &Dyld,
-                            const object::ObjectFile &Obj) override {
-      LLVM_DEBUG(dbgs() << "Allocator " << Id << " applied mappings:\n");
-      for (auto &ObjAllocs : Unmapped) {
-        mapAllocsToRemoteAddrs(Dyld, ObjAllocs.CodeAllocs,
-                               ObjAllocs.RemoteCodeAddr);
-        mapAllocsToRemoteAddrs(Dyld, ObjAllocs.RODataAllocs,
-                               ObjAllocs.RemoteRODataAddr);
-        mapAllocsToRemoteAddrs(Dyld, ObjAllocs.RWDataAllocs,
-                               ObjAllocs.RemoteRWDataAddr);
-        Unfinalized.push_back(std::move(ObjAllocs));
-      }
-      Unmapped.clear();
-    }
-
-    bool finalizeMemory(std::string *ErrMsg = nullptr) override {
-      LLVM_DEBUG(dbgs() << "Allocator " << Id << " finalizing:\n");
-
-      for (auto &ObjAllocs : Unfinalized) {
-        if (copyAndProtect(ObjAllocs.CodeAllocs, ObjAllocs.RemoteCodeAddr,
-                           sys::Memory::MF_READ | sys::Memory::MF_EXEC))
-          return true;
-
-        if (copyAndProtect(ObjAllocs.RODataAllocs, ObjAllocs.RemoteRODataAddr,
-                           sys::Memory::MF_READ))
-          return true;
-
-        if (copyAndProtect(ObjAllocs.RWDataAllocs, ObjAllocs.RemoteRWDataAddr,
-                           sys::Memory::MF_READ | sys::Memory::MF_WRITE))
-          return true;
-      }
-      Unfinalized.clear();
-
-      for (auto &EHFrame : UnfinalizedEHFrames) {
-        if (auto Err = Client.registerEHFrames(EHFrame.Addr, EHFrame.Size)) {
-          // FIXME: Replace this once finalizeMemory can return an Error.
-          handleAllErrors(std::move(Err), [&](ErrorInfoBase &EIB) {
-            if (ErrMsg) {
-              raw_string_ostream ErrOut(*ErrMsg);
-              EIB.log(ErrOut);
-            }
-          });
-          return false;
-        }
-      }
-      RegisteredEHFrames = std::move(UnfinalizedEHFrames);
-      UnfinalizedEHFrames = {};
-
-      return false;
-    }
-
-  private:
-    class Alloc {
-    public:
-      Alloc(uint64_t Size, unsigned Align)
-          : Size(Size), Align(Align), Contents(new char[Size + Align - 1]) {}
-
-      Alloc(const Alloc &) = delete;
-      Alloc &operator=(const Alloc &) = delete;
-      Alloc(Alloc &&) = default;
-      Alloc &operator=(Alloc &&) = default;
-
-      uint64_t getSize() const { return Size; }
-
-      unsigned getAlign() const { return Align; }
-
-      char *getLocalAddress() const {
-        uintptr_t LocalAddr = reinterpret_cast<uintptr_t>(Contents.get());
-        LocalAddr = alignTo(LocalAddr, Align);
-        return reinterpret_cast<char *>(LocalAddr);
-      }
-
-      void setRemoteAddress(JITTargetAddress RemoteAddr) {
-        this->RemoteAddr = RemoteAddr;
-      }
-
-      JITTargetAddress getRemoteAddress() const { return RemoteAddr; }
-
-    private:
-      uint64_t Size;
-      unsigned Align;
-      std::unique_ptr<char[]> Contents;
-      JITTargetAddress RemoteAddr = 0;
-    };
-
-    struct ObjectAllocs {
-      ObjectAllocs() = default;
-      ObjectAllocs(const ObjectAllocs &) = delete;
-      ObjectAllocs &operator=(const ObjectAllocs &) = delete;
-      ObjectAllocs(ObjectAllocs &&) = default;
-      ObjectAllocs &operator=(ObjectAllocs &&) = default;
-
-      JITTargetAddress RemoteCodeAddr = 0;
-      JITTargetAddress RemoteRODataAddr = 0;
-      JITTargetAddress RemoteRWDataAddr = 0;
-      std::vector<Alloc> CodeAllocs, RODataAllocs, RWDataAllocs;
-    };
-
-    RemoteRTDyldMemoryManager(OrcRemoteTargetClient &Client,
-                              ResourceIdMgr::ResourceId Id)
-        : Client(Client), Id(Id) {
-      LLVM_DEBUG(dbgs() << "Created remote allocator " << Id << "\n");
-    }
-
-    // Maps all allocations in Allocs to aligned blocks
-    void mapAllocsToRemoteAddrs(RuntimeDyld &Dyld, std::vector<Alloc> &Allocs,
-                                JITTargetAddress NextAddr) {
-      for (auto &Alloc : Allocs) {
-        NextAddr = alignTo(NextAddr, Alloc.getAlign());
-        Dyld.mapSectionAddress(Alloc.getLocalAddress(), NextAddr);
-        LLVM_DEBUG(
-            dbgs() << "     " << static_cast<void *>(Alloc.getLocalAddress())
-                   << " -> " << format("0x%016" PRIx64, NextAddr) << "\n");
-        Alloc.setRemoteAddress(NextAddr);
-
-        // Only advance NextAddr if it was non-null to begin with,
-        // otherwise leave it as null.
-        if (NextAddr)
-          NextAddr += Alloc.getSize();
-      }
-    }
-
-    // Copies data for each alloc in the list, then set permissions on the
-    // segment.
-    bool copyAndProtect(const std::vector<Alloc> &Allocs,
-                        JITTargetAddress RemoteSegmentAddr,
-                        unsigned Permissions) {
-      if (RemoteSegmentAddr) {
-        assert(!Allocs.empty() && "No sections in allocated segment");
-
-        for (auto &Alloc : Allocs) {
-          LLVM_DEBUG(dbgs() << "  copying section: "
-                            << static_cast<void *>(Alloc.getLocalAddress())
-                            << " -> "
-                            << format("0x%016" PRIx64, Alloc.getRemoteAddress())
-                            << " (" << Alloc.getSize() << " bytes)\n";);
-
-          if (Client.writeMem(Alloc.getRemoteAddress(), Alloc.getLocalAddress(),
-                              Alloc.getSize()))
-            return true;
-        }
-
-        LLVM_DEBUG(dbgs() << "  setting "
-                          << (Permissions & sys::Memory::MF_READ ? 'R' : '-')
-                          << (Permissions & sys::Memory::MF_WRITE ? 'W' : '-')
-                          << (Permissions & sys::Memory::MF_EXEC ? 'X' : '-')
-                          << " permissions on block: "
-                          << format("0x%016" PRIx64, RemoteSegmentAddr)
-                          << "\n");
-        if (Client.setProtections(Id, RemoteSegmentAddr, Permissions))
-          return true;
-      }
-      return false;
-    }
-
-    OrcRemoteTargetClient &Client;
-    ResourceIdMgr::ResourceId Id;
-    std::vector<ObjectAllocs> Unmapped;
-    std::vector<ObjectAllocs> Unfinalized;
-
-    struct EHFrame {
-      JITTargetAddress Addr;
-      uint64_t Size;
-    };
-    std::vector<EHFrame> UnfinalizedEHFrames;
-    std::vector<EHFrame> RegisteredEHFrames;
-  };
-
-  class RPCMMAlloc : public jitlink::JITLinkMemoryManager::Allocation {
-    using AllocationMap = DenseMap<unsigned, sys::MemoryBlock>;
-    using FinalizeContinuation =
-        jitlink::JITLinkMemoryManager::Allocation::FinalizeContinuation;
-    using ProtectionFlags = sys::Memory::ProtectionFlags;
-    using SegmentsRequestMap =
-        DenseMap<unsigned, jitlink::JITLinkMemoryManager::SegmentRequest>;
-
-    RPCMMAlloc(OrcRemoteTargetClient &Client, ResourceIdMgr::ResourceId Id)
-        : Client(Client), Id(Id) {}
-
-  public:
-    static Expected<std::unique_ptr<RPCMMAlloc>>
-    Create(OrcRemoteTargetClient &Client, ResourceIdMgr::ResourceId Id,
-           const SegmentsRequestMap &Request) {
-      auto *MM = new RPCMMAlloc(Client, Id);
-
-      if (Error Err = MM->allocateHostBlocks(Request))
-        return std::move(Err);
-
-      if (Error Err = MM->allocateTargetBlocks())
-        return std::move(Err);
-
-      return std::unique_ptr<RPCMMAlloc>(MM);
-    }
-
-    MutableArrayRef<char> getWorkingMemory(ProtectionFlags Seg) override {
-      assert(HostSegBlocks.count(Seg) && "No allocation for segment");
-      return {static_cast<char *>(HostSegBlocks[Seg].base()),
-              HostSegBlocks[Seg].allocatedSize()};
-    }
-
-    JITTargetAddress getTargetMemory(ProtectionFlags Seg) override {
-      assert(TargetSegBlocks.count(Seg) && "No allocation for segment");
-      return pointerToJITTargetAddress(TargetSegBlocks[Seg].base());
-    }
-
-    void finalizeAsync(FinalizeContinuation OnFinalize) override {
-      // Host allocations (working memory) remain ReadWrite.
-      OnFinalize(copyAndProtect());
-    }
-
-    Error deallocate() override {
-      // TODO: Cannot release target allocation. RPCAPI has no function
-      // symmetric to reserveMem(). Add RPC call like freeMem()?
-      return errorCodeToError(sys::Memory::releaseMappedMemory(HostAllocation));
-    }
-
-  private:
-    OrcRemoteTargetClient &Client;
-    ResourceIdMgr::ResourceId Id;
-    AllocationMap HostSegBlocks;
-    AllocationMap TargetSegBlocks;
-    JITTargetAddress TargetSegmentAddr;
-    sys::MemoryBlock HostAllocation;
-
-    Error allocateHostBlocks(const SegmentsRequestMap &Request) {
-      unsigned TargetPageSize = Client.getPageSize();
-
-      if (!isPowerOf2_64(static_cast<uint64_t>(TargetPageSize)))
-        return make_error<StringError>("Host page size is not a power of 2",
-                                       inconvertibleErrorCode());
-
-      auto TotalSize = calcTotalAllocSize(Request, TargetPageSize);
-      if (!TotalSize)
-        return TotalSize.takeError();
-
-      // Allocate one slab to cover all the segments.
-      const sys::Memory::ProtectionFlags ReadWrite =
-          static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ |
-                                                    sys::Memory::MF_WRITE);
-      std::error_code EC;
-      HostAllocation =
-          sys::Memory::allocateMappedMemory(*TotalSize, nullptr, ReadWrite, EC);
-      if (EC)
-        return errorCodeToError(EC);
-
-      char *SlabAddr = static_cast<char *>(HostAllocation.base());
-#ifndef NDEBUG
-      char *SlabAddrEnd = SlabAddr + HostAllocation.allocatedSize();
-#endif
-
-      // Allocate segment memory from the slab.
-      for (auto &KV : Request) {
-        const auto &Seg = KV.second;
-
-        uint64_t SegmentSize = Seg.getContentSize() + Seg.getZeroFillSize();
-        uint64_t AlignedSegmentSize = alignTo(SegmentSize, TargetPageSize);
-
-        // Zero out zero-fill memory.
-        char *ZeroFillBegin = SlabAddr + Seg.getContentSize();
-        memset(ZeroFillBegin, 0, Seg.getZeroFillSize());
-
-        // Record the block for this segment.
-        HostSegBlocks[KV.first] =
-            sys::MemoryBlock(SlabAddr, AlignedSegmentSize);
-
-        SlabAddr += AlignedSegmentSize;
-        assert(SlabAddr <= SlabAddrEnd && "Out of range");
-      }
-
-      return Error::success();
-    }
-
-    Error allocateTargetBlocks() {
-      // Reserve memory for all blocks on the target. We need as much space on
-      // the target as we allocated on the host.
-      TargetSegmentAddr = Client.reserveMem(Id, HostAllocation.allocatedSize(),
-                                            Client.getPageSize());
-      if (!TargetSegmentAddr)
-        return make_error<StringError>("Failed to reserve memory on the target",
-                                       inconvertibleErrorCode());
-
-      // Map memory blocks into the allocation, that match the host allocation.
-      JITTargetAddress TargetAllocAddr = TargetSegmentAddr;
-      for (const auto &KV : HostSegBlocks) {
-        size_t TargetAllocSize = KV.second.allocatedSize();
-
-        TargetSegBlocks[KV.first] =
-            sys::MemoryBlock(jitTargetAddressToPointer<void *>(TargetAllocAddr),
-                             TargetAllocSize);
-
-        TargetAllocAddr += TargetAllocSize;
-        assert(TargetAllocAddr - TargetSegmentAddr <=
-                   HostAllocation.allocatedSize() &&
-               "Out of range on target");
-      }
-
-      return Error::success();
-    }
-
-    Error copyAndProtect() {
-      unsigned Permissions = 0u;
-
-      // Copy segments one by one.
-      for (auto &KV : TargetSegBlocks) {
-        Permissions |= KV.first;
-
-        const sys::MemoryBlock &TargetBlock = KV.second;
-        const sys::MemoryBlock &HostBlock = HostSegBlocks.lookup(KV.first);
-
-        size_t TargetAllocSize = TargetBlock.allocatedSize();
-        auto TargetAllocAddr = pointerToJITTargetAddress(TargetBlock.base());
-        auto *HostAllocBegin = static_cast<const char *>(HostBlock.base());
-
-        bool CopyErr =
-            Client.writeMem(TargetAllocAddr, HostAllocBegin, TargetAllocSize);
-        if (CopyErr)
-          return createStringError(inconvertibleErrorCode(),
-                                   "Failed to copy %d segment to the target",
-                                   KV.first);
-      }
-
-      // Set permission flags for all segments at once.
-      bool ProtectErr =
-          Client.setProtections(Id, TargetSegmentAddr, Permissions);
-      if (ProtectErr)
-        return createStringError(inconvertibleErrorCode(),
-                                 "Failed to apply permissions for %d segment "
-                                 "on the target",
-                                 Permissions);
-      return Error::success();
-    }
-
-    static Expected<size_t>
-    calcTotalAllocSize(const SegmentsRequestMap &Request,
-                       unsigned TargetPageSize) {
-      size_t TotalSize = 0;
-      for (const auto &KV : Request) {
-        const auto &Seg = KV.second;
-
-        if (Seg.getAlignment() > TargetPageSize)
-          return make_error<StringError>("Cannot request alignment higher than "
-                                         "page alignment on target",
-                                         inconvertibleErrorCode());
-
-        TotalSize = alignTo(TotalSize, TargetPageSize);
-        TotalSize += Seg.getContentSize();
-        TotalSize += Seg.getZeroFillSize();
-      }
-
-      return TotalSize;
-    }
-  };
-
-  class RemoteJITLinkMemoryManager : public jitlink::JITLinkMemoryManager {
-  public:
-    RemoteJITLinkMemoryManager(OrcRemoteTargetClient &Client,
-                               ResourceIdMgr::ResourceId Id)
-        : Client(Client), Id(Id) {}
-
-    RemoteJITLinkMemoryManager(const RemoteJITLinkMemoryManager &) = delete;
-    RemoteJITLinkMemoryManager(RemoteJITLinkMemoryManager &&) = default;
-
-    RemoteJITLinkMemoryManager &
-    operator=(const RemoteJITLinkMemoryManager &) = delete;
-    RemoteJITLinkMemoryManager &
-    operator=(RemoteJITLinkMemoryManager &&) = delete;
-
-    ~RemoteJITLinkMemoryManager() {
-      Client.destroyRemoteAllocator(Id);
-      LLVM_DEBUG(dbgs() << "Destroyed remote allocator " << Id << "\n");
-    }
-
-    Expected<std::unique_ptr<Allocation>>
-    allocate(const jitlink::JITLinkDylib *JD,
-             const SegmentsRequestMap &Request) override {
-      return RPCMMAlloc::Create(Client, Id, Request);
-    }
-
-  private:
-    OrcRemoteTargetClient &Client;
-    ResourceIdMgr::ResourceId Id;
-  };
-
-  /// Remote indirect stubs manager.
-  class RemoteIndirectStubsManager : public IndirectStubsManager {
-  public:
-    RemoteIndirectStubsManager(OrcRemoteTargetClient &Client,
-                               ResourceIdMgr::ResourceId Id)
-        : Client(Client), Id(Id) {}
-
-    ~RemoteIndirectStubsManager() override {
-      Client.destroyIndirectStubsManager(Id);
-    }
-
-    Error createStub(StringRef StubName, JITTargetAddress StubAddr,
-                     JITSymbolFlags StubFlags) override {
-      if (auto Err = reserveStubs(1))
-        return Err;
-
-      return createStubInternal(StubName, StubAddr, StubFlags);
-    }
-
-    Error createStubs(const StubInitsMap &StubInits) override {
-      if (auto Err = reserveStubs(StubInits.size()))
-        return Err;
-
-      for (auto &Entry : StubInits)
-        if (auto Err = createStubInternal(Entry.first(), Entry.second.first,
-                                          Entry.second.second))
-          return Err;
-
-      return Error::success();
-    }
-
-    JITEvaluatedSymbol findStub(StringRef Name, bool ExportedStubsOnly) override {
-      auto I = StubIndexes.find(Name);
-      if (I == StubIndexes.end())
-        return nullptr;
-      auto Key = I->second.first;
-      auto Flags = I->second.second;
-      auto StubSymbol = JITEvaluatedSymbol(getStubAddr(Key), Flags);
-      if (ExportedStubsOnly && !StubSymbol.getFlags().isExported())
-        return nullptr;
-      return StubSymbol;
-    }
-
-    JITEvaluatedSymbol findPointer(StringRef Name) override {
-      auto I = StubIndexes.find(Name);
-      if (I == StubIndexes.end())
-        return nullptr;
-      auto Key = I->second.first;
-      auto Flags = I->second.second;
-      return JITEvaluatedSymbol(getPtrAddr(Key), Flags);
-    }
-
-    Error updatePointer(StringRef Name, JITTargetAddress NewAddr) override {
-      auto I = StubIndexes.find(Name);
-      assert(I != StubIndexes.end() && "No stub pointer for symbol");
-      auto Key = I->second.first;
-      return Client.writePointer(getPtrAddr(Key), NewAddr);
-    }
-
-  private:
-    struct RemoteIndirectStubsInfo {
-      JITTargetAddress StubBase;
-      JITTargetAddress PtrBase;
-      unsigned NumStubs;
-    };
-
-    using StubKey = std::pair<uint16_t, uint16_t>;
-
-    Error reserveStubs(unsigned NumStubs) {
-      if (NumStubs <= FreeStubs.size())
-        return Error::success();
-
-      unsigned NewStubsRequired = NumStubs - FreeStubs.size();
-      JITTargetAddress StubBase;
-      JITTargetAddress PtrBase;
-      unsigned NumStubsEmitted;
-
-      if (auto StubInfoOrErr = Client.emitIndirectStubs(Id, NewStubsRequired))
-        std::tie(StubBase, PtrBase, NumStubsEmitted) = *StubInfoOrErr;
-      else
-        return StubInfoOrErr.takeError();
-
-      unsigned NewBlockId = RemoteIndirectStubsInfos.size();
-      RemoteIndirectStubsInfos.push_back({StubBase, PtrBase, NumStubsEmitted});
-
-      for (unsigned I = 0; I < NumStubsEmitted; ++I)
-        FreeStubs.push_back(std::make_pair(NewBlockId, I));
-
-      return Error::success();
-    }
-
-    Error createStubInternal(StringRef StubName, JITTargetAddress InitAddr,
-                             JITSymbolFlags StubFlags) {
-      auto Key = FreeStubs.back();
-      FreeStubs.pop_back();
-      StubIndexes[StubName] = std::make_pair(Key, StubFlags);
-      return Client.writePointer(getPtrAddr(Key), InitAddr);
-    }
-
-    JITTargetAddress getStubAddr(StubKey K) {
-      assert(RemoteIndirectStubsInfos[K.first].StubBase != 0 &&
-             "Missing stub address");
-      return RemoteIndirectStubsInfos[K.first].StubBase +
-             K.second * Client.getIndirectStubSize();
-    }
-
-    JITTargetAddress getPtrAddr(StubKey K) {
-      assert(RemoteIndirectStubsInfos[K.first].PtrBase != 0 &&
-             "Missing pointer address");
-      return RemoteIndirectStubsInfos[K.first].PtrBase +
-             K.second * Client.getPointerSize();
-    }
-
-    OrcRemoteTargetClient &Client;
-    ResourceIdMgr::ResourceId Id;
-    std::vector<RemoteIndirectStubsInfo> RemoteIndirectStubsInfos;
-    std::vector<StubKey> FreeStubs;
-    StringMap<std::pair<StubKey, JITSymbolFlags>> StubIndexes;
-  };
-
-  class RemoteTrampolinePool : public TrampolinePool {
-  public:
-    RemoteTrampolinePool(OrcRemoteTargetClient &Client) : Client(Client) {}
-
-  private:
-    Error grow() override {
-      JITTargetAddress BlockAddr = 0;
-      uint32_t NumTrampolines = 0;
-      if (auto TrampolineInfoOrErr = Client.emitTrampolineBlock())
-        std::tie(BlockAddr, NumTrampolines) = *TrampolineInfoOrErr;
-      else
-        return TrampolineInfoOrErr.takeError();
-
-      uint32_t TrampolineSize = Client.getTrampolineSize();
-      for (unsigned I = 0; I < NumTrampolines; ++I)
-        AvailableTrampolines.push_back(BlockAddr + (I * TrampolineSize));
-
-      return Error::success();
-    }
-
-    OrcRemoteTargetClient &Client;
-  };
-
-  /// Remote compile callback manager.
-  class RemoteCompileCallbackManager : public JITCompileCallbackManager {
-  public:
-    RemoteCompileCallbackManager(OrcRemoteTargetClient &Client,
-                                 ExecutionSession &ES,
-                                 JITTargetAddress ErrorHandlerAddress)
-        : JITCompileCallbackManager(
-              std::make_unique<RemoteTrampolinePool>(Client), ES,
-              ErrorHandlerAddress) {}
-  };
-
-  /// Create an OrcRemoteTargetClient.
-  /// Channel is the ChannelT instance to communicate on. It is assumed that
-  /// the channel is ready to be read from and written to.
-  static Expected<std::unique_ptr<OrcRemoteTargetClient>>
-  Create(shared::RawByteChannel &Channel, ExecutionSession &ES) {
-    Error Err = Error::success();
-    auto Client = std::unique_ptr<OrcRemoteTargetClient>(
-        new OrcRemoteTargetClient(Channel, ES, Err));
-    if (Err)
-      return std::move(Err);
-    return std::move(Client);
-  }
-
-  /// Call the int(void) function at the given address in the target and return
-  /// its result.
-  Expected<int> callIntVoid(JITTargetAddress Addr) {
-    LLVM_DEBUG(dbgs() << "Calling int(*)(void) "
-                      << format("0x%016" PRIx64, Addr) << "\n");
-    return callB<exec::CallIntVoid>(Addr);
-  }
-
-  /// Call the int(int) function at the given address in the target and return
-  /// its result.
-  Expected<int> callIntInt(JITTargetAddress Addr, int Arg) {
-    LLVM_DEBUG(dbgs() << "Calling int(*)(int) " << format("0x%016" PRIx64, Addr)
-                      << "\n");
-    return callB<exec::CallIntInt>(Addr, Arg);
-  }
-
-  /// Call the int(int, char*[]) function at the given address in the target and
-  /// return its result.
-  Expected<int> callMain(JITTargetAddress Addr,
-                         const std::vector<std::string> &Args) {
-    LLVM_DEBUG(dbgs() << "Calling int(*)(int, char*[]) "
-                      << format("0x%016" PRIx64, Addr) << "\n");
-    return callB<exec::CallMain>(Addr, Args);
-  }
-
-  /// Call the void() function at the given address in the target and wait for
-  /// it to finish.
-  Error callVoidVoid(JITTargetAddress Addr) {
-    LLVM_DEBUG(dbgs() << "Calling void(*)(void) "
-                      << format("0x%016" PRIx64, Addr) << "\n");
-    return callB<exec::CallVoidVoid>(Addr);
-  }
-
-  /// Create an RCMemoryManager which will allocate its memory on the remote
-  /// target.
-  Expected<std::unique_ptr<RemoteRTDyldMemoryManager>>
-  createRemoteMemoryManager() {
-    auto Id = AllocatorIds.getNext();
-    if (auto Err = callB<mem::CreateRemoteAllocator>(Id))
-      return std::move(Err);
-    return std::unique_ptr<RemoteRTDyldMemoryManager>(
-        new RemoteRTDyldMemoryManager(*this, Id));
-  }
-
-  /// Create a JITLink-compatible memory manager which will allocate working
-  /// memory on the host and target memory on the remote target.
-  Expected<std::unique_ptr<RemoteJITLinkMemoryManager>>
-  createRemoteJITLinkMemoryManager() {
-    auto Id = AllocatorIds.getNext();
-    if (auto Err = callB<mem::CreateRemoteAllocator>(Id))
-      return std::move(Err);
-    LLVM_DEBUG(dbgs() << "Created remote allocator " << Id << "\n");
-    return std::unique_ptr<RemoteJITLinkMemoryManager>(
-        new RemoteJITLinkMemoryManager(*this, Id));
-  }
-
-  /// Create an RCIndirectStubsManager that will allocate stubs on the remote
-  /// target.
-  Expected<std::unique_ptr<RemoteIndirectStubsManager>>
-  createIndirectStubsManager() {
-    auto Id = IndirectStubOwnerIds.getNext();
-    if (auto Err = callB<stubs::CreateIndirectStubsOwner>(Id))
-      return std::move(Err);
-    return std::make_unique<RemoteIndirectStubsManager>(*this, Id);
-  }
-
-  Expected<RemoteCompileCallbackManager &>
-  enableCompileCallbacks(JITTargetAddress ErrorHandlerAddress) {
-    assert(!CallbackManager && "CallbackManager already obtained");
-
-    // Emit the resolver block on the JIT server.
-    if (auto Err = callB<stubs::EmitResolverBlock>())
-      return std::move(Err);
-
-    // Create the callback manager.
-    CallbackManager.emplace(*this, ES, ErrorHandlerAddress);
-    RemoteCompileCallbackManager &Mgr = *CallbackManager;
-    return Mgr;
-  }
-
-  /// Search for symbols in the remote process. Note: This should be used by
-  /// symbol resolvers *after* they've searched the local symbol table in the
-  /// JIT stack.
-  Expected<JITTargetAddress> getSymbolAddress(StringRef Name) {
-    return callB<utils::GetSymbolAddress>(Name);
-  }
-
-  /// Get the triple for the remote target.
-  const std::string &getTargetTriple() const { return RemoteTargetTriple; }
-
-  Error terminateSession() { return callB<utils::TerminateSession>(); }
-
-private:
-  OrcRemoteTargetClient(shared::RawByteChannel &Channel, ExecutionSession &ES,
-                        Error &Err)
-      : shared::SingleThreadedRPCEndpoint<shared::RawByteChannel>(Channel,
-                                                                  true),
-        ES(ES) {
-    ErrorAsOutParameter EAO(&Err);
-
-    addHandler<utils::RequestCompile>(
-        [this](JITTargetAddress Addr) -> JITTargetAddress {
-          if (CallbackManager)
-            return CallbackManager->executeCompileCallback(Addr);
-          return 0;
-        });
-
-    if (auto RIOrErr = callB<utils::GetRemoteInfo>()) {
-      std::tie(RemoteTargetTriple, RemotePointerSize, RemotePageSize,
-               RemoteTrampolineSize, RemoteIndirectStubSize) = *RIOrErr;
-      Err = Error::success();
-    } else
-      Err = RIOrErr.takeError();
-  }
-
-  void deregisterEHFrames(JITTargetAddress Addr, uint32_t Size) {
-    if (auto Err = callB<eh::RegisterEHFrames>(Addr, Size))
-      ES.reportError(std::move(Err));
-  }
-
-  void destroyRemoteAllocator(ResourceIdMgr::ResourceId Id) {
-    if (auto Err = callB<mem::DestroyRemoteAllocator>(Id)) {
-      // FIXME: This will be triggered by a removeModuleSet call: Propagate
-      //        error return up through that.
-      llvm_unreachable("Failed to destroy remote allocator.");
-      AllocatorIds.release(Id);
-    }
-  }
-
-  void destroyIndirectStubsManager(ResourceIdMgr::ResourceId Id) {
-    IndirectStubOwnerIds.release(Id);
-    if (auto Err = callB<stubs::DestroyIndirectStubsOwner>(Id))
-      ES.reportError(std::move(Err));
-  }
-
-  Expected<std::tuple<JITTargetAddress, JITTargetAddress, uint32_t>>
-  emitIndirectStubs(ResourceIdMgr::ResourceId Id, uint32_t NumStubsRequired) {
-    return callB<stubs::EmitIndirectStubs>(Id, NumStubsRequired);
-  }
-
-  Expected<std::tuple<JITTargetAddress, uint32_t>> emitTrampolineBlock() {
-    return callB<stubs::EmitTrampolineBlock>();
-  }
-
-  uint32_t getIndirectStubSize() const { return RemoteIndirectStubSize; }
-  uint32_t getPageSize() const { return RemotePageSize; }
-  uint32_t getPointerSize() const { return RemotePointerSize; }
-
-  uint32_t getTrampolineSize() const { return RemoteTrampolineSize; }
-
-  Expected<std::vector<uint8_t>> readMem(char *Dst, JITTargetAddress Src,
-                                         uint64_t Size) {
-    return callB<mem::ReadMem>(Src, Size);
-  }
-
-  Error registerEHFrames(JITTargetAddress &RAddr, uint32_t Size) {
-    // FIXME: Duplicate error and report it via ReportError too?
-    return callB<eh::RegisterEHFrames>(RAddr, Size);
-  }
-
-  JITTargetAddress reserveMem(ResourceIdMgr::ResourceId Id, uint64_t Size,
-                              uint32_t Align) {
-    if (auto AddrOrErr = callB<mem::ReserveMem>(Id, Size, Align))
-      return *AddrOrErr;
-    else {
-      ES.reportError(AddrOrErr.takeError());
-      return 0;
-    }
-  }
-
-  bool setProtections(ResourceIdMgr::ResourceId Id,
-                      JITTargetAddress RemoteSegAddr, unsigned ProtFlags) {
-    if (auto Err = callB<mem::SetProtections>(Id, RemoteSegAddr, ProtFlags)) {
-      ES.reportError(std::move(Err));
-      return true;
-    } else
-      return false;
-  }
-
-  bool writeMem(JITTargetAddress Addr, const char *Src, uint64_t Size) {
-    if (auto Err = callB<mem::WriteMem>(DirectBufferWriter(Src, Addr, Size))) {
-      ES.reportError(std::move(Err));
-      return true;
-    } else
-      return false;
-  }
-
-  Error writePointer(JITTargetAddress Addr, JITTargetAddress PtrVal) {
-    return callB<mem::WritePtr>(Addr, PtrVal);
-  }
-
-  static Error doNothing() { return Error::success(); }
-
-  ExecutionSession &ES;
-  std::function<void(Error)> ReportError;
-  std::string RemoteTargetTriple;
-  uint32_t RemotePointerSize = 0;
-  uint32_t RemotePageSize = 0;
-  uint32_t RemoteTrampolineSize = 0;
-  uint32_t RemoteIndirectStubSize = 0;
-  ResourceIdMgr AllocatorIds, IndirectStubOwnerIds;
-  Optional<RemoteCompileCallbackManager> CallbackManager;
-};
-
-} // end namespace remote
-} // end namespace orc
-} // end namespace llvm
-
-#undef DEBUG_TYPE
-
-#endif // LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETCLIENT_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h b/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h
deleted file mode 100644
index 367bfb369191..000000000000
--- a/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h
+++ /dev/null
@@ -1,386 +0,0 @@
-//===- OrcRemoteTargetRPCAPI.h - Orc Remote-target RPC API ------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the Orc remote-target RPC API. It should not be used
-// directly, but is used by the RemoteTargetClient and RemoteTargetServer
-// classes.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETRPCAPI_H
-#define LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETRPCAPI_H
-
-#include "llvm/ExecutionEngine/JITSymbol.h"
-#include "llvm/ExecutionEngine/Orc/Shared/RPCUtils.h"
-#include "llvm/ExecutionEngine/Orc/Shared/RawByteChannel.h"
-
-namespace llvm {
-namespace orc {
-
-namespace remote {
-
-/// Template error for missing resources.
-template <typename ResourceIdT>
-class ResourceNotFound
-  : public ErrorInfo<ResourceNotFound<ResourceIdT>> {
-public:
-  static char ID;
-
-  ResourceNotFound(ResourceIdT ResourceId,
-                   std::string ResourceDescription = "")
-    : ResourceId(std::move(ResourceId)),
-      ResourceDescription(std::move(ResourceDescription)) {}
-
-  std::error_code convertToErrorCode() const override {
-    return orcError(OrcErrorCode::UnknownResourceHandle);
-  }
-
-  void log(raw_ostream &OS) const override {
-    OS << (ResourceDescription.empty()
-             ? "Remote resource with id "
-               : ResourceDescription)
-       << " " << ResourceId << " not found";
-  }
-
-private:
-  ResourceIdT ResourceId;
-  std::string ResourceDescription;
-};
-
-template <typename ResourceIdT>
-char ResourceNotFound<ResourceIdT>::ID = 0;
-
-class DirectBufferWriter {
-public:
-  DirectBufferWriter() = default;
-  DirectBufferWriter(const char *Src, JITTargetAddress Dst, uint64_t Size)
-      : Src(Src), Dst(Dst), Size(Size) {}
-
-  const char *getSrc() const { return Src; }
-  JITTargetAddress getDst() const { return Dst; }
-  uint64_t getSize() const { return Size; }
-
-private:
-  const char *Src;
-  JITTargetAddress Dst;
-  uint64_t Size;
-};
-
-} // end namespace remote
-
-namespace shared {
-
-template <> class SerializationTypeName<JITSymbolFlags> {
-public:
-  static const char *getName() { return "JITSymbolFlags"; }
-};
-
-template <typename ChannelT>
-class SerializationTraits<ChannelT, JITSymbolFlags> {
-public:
-
-  static Error serialize(ChannelT &C, const JITSymbolFlags &Flags) {
-    return serializeSeq(C, Flags.getRawFlagsValue(), Flags.getTargetFlags());
-  }
-
-  static Error deserialize(ChannelT &C, JITSymbolFlags &Flags) {
-    JITSymbolFlags::UnderlyingType JITFlags;
-    JITSymbolFlags::TargetFlagsType TargetFlags;
-    if (auto Err = deserializeSeq(C, JITFlags, TargetFlags))
-      return Err;
-    Flags = JITSymbolFlags(static_cast<JITSymbolFlags::FlagNames>(JITFlags),
-                           TargetFlags);
-    return Error::success();
-  }
-};
-
-template <> class SerializationTypeName<remote::DirectBufferWriter> {
-public:
-  static const char *getName() { return "DirectBufferWriter"; }
-};
-
-template <typename ChannelT>
-class SerializationTraits<
-    ChannelT, remote::DirectBufferWriter, remote::DirectBufferWriter,
-    std::enable_if_t<std::is_base_of<RawByteChannel, ChannelT>::value>> {
-public:
-  static Error serialize(ChannelT &C, const remote::DirectBufferWriter &DBW) {
-    if (auto EC = serializeSeq(C, DBW.getDst()))
-      return EC;
-    if (auto EC = serializeSeq(C, DBW.getSize()))
-      return EC;
-    return C.appendBytes(DBW.getSrc(), DBW.getSize());
-  }
-
-  static Error deserialize(ChannelT &C, remote::DirectBufferWriter &DBW) {
-    JITTargetAddress Dst;
-    if (auto EC = deserializeSeq(C, Dst))
-      return EC;
-    uint64_t Size;
-    if (auto EC = deserializeSeq(C, Size))
-      return EC;
-    char *Addr = reinterpret_cast<char *>(static_cast<uintptr_t>(Dst));
-
-    DBW = remote::DirectBufferWriter(nullptr, Dst, Size);
-
-    return C.readBytes(Addr, Size);
-  }
-};
-
-} // end namespace shared
-
-namespace remote {
-
-class ResourceIdMgr {
-public:
-  using ResourceId = uint64_t;
-  static const ResourceId InvalidId = ~0U;
-
-  ResourceIdMgr() = default;
-  explicit ResourceIdMgr(ResourceId FirstValidId)
-    : NextId(std::move(FirstValidId)) {}
-
-  ResourceId getNext() {
-    if (!FreeIds.empty()) {
-      ResourceId I = FreeIds.back();
-      FreeIds.pop_back();
-      return I;
-    }
-    assert(NextId + 1 != ~0ULL && "All ids allocated");
-    return NextId++;
-  }
-
-  void release(ResourceId I) { FreeIds.push_back(I); }
-
-private:
-  ResourceId NextId = 1;
-  std::vector<ResourceId> FreeIds;
-};
-
-/// Registers EH frames on the remote.
-namespace eh {
-
-  /// Registers EH frames on the remote.
-class RegisterEHFrames
-    : public shared::RPCFunction<RegisterEHFrames,
-                                 void(JITTargetAddress Addr, uint32_t Size)> {
-public:
-  static const char *getName() { return "RegisterEHFrames"; }
-};
-
-  /// Deregisters EH frames on the remote.
-class DeregisterEHFrames
-    : public shared::RPCFunction<DeregisterEHFrames,
-                                 void(JITTargetAddress Addr, uint32_t Size)> {
-public:
-  static const char *getName() { return "DeregisterEHFrames"; }
-};
-
-} // end namespace eh
-
-/// RPC functions for executing remote code.
-namespace exec {
-
-  /// Call an 'int32_t()'-type function on the remote, returns the called
-  /// function's return value.
-class CallIntVoid
-    : public shared::RPCFunction<CallIntVoid, int32_t(JITTargetAddress Addr)> {
-public:
-  static const char *getName() { return "CallIntVoid"; }
-};
-
-  /// Call an 'int32_t(int32_t)'-type function on the remote, returns the called
-  /// function's return value.
-class CallIntInt
-    : public shared::RPCFunction<CallIntInt,
-                                 int32_t(JITTargetAddress Addr, int)> {
-public:
-  static const char *getName() { return "CallIntInt"; }
-};
-
-  /// Call an 'int32_t(int32_t, char**)'-type function on the remote, returns the
-  /// called function's return value.
-class CallMain
-    : public shared::RPCFunction<CallMain,
-                                 int32_t(JITTargetAddress Addr,
-                                         std::vector<std::string> Args)> {
-public:
-  static const char *getName() { return "CallMain"; }
-};
-
-  /// Calls a 'void()'-type function on the remote, returns when the called
-  /// function completes.
-class CallVoidVoid
-    : public shared::RPCFunction<CallVoidVoid, void(JITTargetAddress FnAddr)> {
-public:
-  static const char *getName() { return "CallVoidVoid"; }
-};
-
-} // end namespace exec
-
-/// RPC functions for remote memory management / inspection / modification.
-namespace mem {
-
-  /// Creates a memory allocator on the remote.
-class CreateRemoteAllocator
-    : public shared::RPCFunction<CreateRemoteAllocator,
-                                 void(ResourceIdMgr::ResourceId AllocatorID)> {
-public:
-  static const char *getName() { return "CreateRemoteAllocator"; }
-};
-
-  /// Destroys a remote allocator, freeing any memory allocated by it.
-class DestroyRemoteAllocator
-    : public shared::RPCFunction<DestroyRemoteAllocator,
-                                 void(ResourceIdMgr::ResourceId AllocatorID)> {
-public:
-  static const char *getName() { return "DestroyRemoteAllocator"; }
-};
-
-  /// Read a remote memory block.
-class ReadMem
-    : public shared::RPCFunction<
-          ReadMem, std::vector<uint8_t>(JITTargetAddress Src, uint64_t Size)> {
-public:
-  static const char *getName() { return "ReadMem"; }
-};
-
-  /// Reserve a block of memory on the remote via the given allocator.
-class ReserveMem
-    : public shared::RPCFunction<
-          ReserveMem, JITTargetAddress(ResourceIdMgr::ResourceId AllocID,
-                                       uint64_t Size, uint32_t Align)> {
-public:
-  static const char *getName() { return "ReserveMem"; }
-};
-
-  /// Set the memory protection on a memory block.
-class SetProtections
-    : public shared::RPCFunction<
-          SetProtections, void(ResourceIdMgr::ResourceId AllocID,
-                               JITTargetAddress Dst, uint32_t ProtFlags)> {
-public:
-  static const char *getName() { return "SetProtections"; }
-};
-
-  /// Write to a remote memory block.
-class WriteMem
-    : public shared::RPCFunction<WriteMem,
-                                 void(remote::DirectBufferWriter DB)> {
-public:
-  static const char *getName() { return "WriteMem"; }
-};
-
-  /// Write to a remote pointer.
-class WritePtr
-    : public shared::RPCFunction<WritePtr, void(JITTargetAddress Dst,
-                                                JITTargetAddress Val)> {
-public:
-  static const char *getName() { return "WritePtr"; }
-};
-
-} // end namespace mem
-
-/// RPC functions for remote stub and trampoline management.
-namespace stubs {
-
-  /// Creates an indirect stub owner on the remote.
-class CreateIndirectStubsOwner
-    : public shared::RPCFunction<CreateIndirectStubsOwner,
-                                 void(ResourceIdMgr::ResourceId StubOwnerID)> {
-public:
-  static const char *getName() { return "CreateIndirectStubsOwner"; }
-};
-
-  /// RPC function for destroying an indirect stubs owner.
-class DestroyIndirectStubsOwner
-    : public shared::RPCFunction<DestroyIndirectStubsOwner,
-                                 void(ResourceIdMgr::ResourceId StubsOwnerID)> {
-public:
-  static const char *getName() { return "DestroyIndirectStubsOwner"; }
-};
-
-  /// EmitIndirectStubs result is (StubsBase, PtrsBase, NumStubsEmitted).
-class EmitIndirectStubs
-    : public shared::RPCFunction<
-          EmitIndirectStubs,
-          std::tuple<JITTargetAddress, JITTargetAddress, uint32_t>(
-              ResourceIdMgr::ResourceId StubsOwnerID,
-              uint32_t NumStubsRequired)> {
-public:
-  static const char *getName() { return "EmitIndirectStubs"; }
-};
-
-  /// RPC function to emit the resolver block and return its address.
-class EmitResolverBlock
-    : public shared::RPCFunction<EmitResolverBlock, void()> {
-public:
-  static const char *getName() { return "EmitResolverBlock"; }
-};
-
-  /// EmitTrampolineBlock result is (BlockAddr, NumTrampolines).
-class EmitTrampolineBlock
-    : public shared::RPCFunction<EmitTrampolineBlock,
-                                 std::tuple<JITTargetAddress, uint32_t>()> {
-public:
-  static const char *getName() { return "EmitTrampolineBlock"; }
-};
-
-} // end namespace stubs
-
-/// Miscelaneous RPC functions for dealing with remotes.
-namespace utils {
-
-  /// GetRemoteInfo result is (Triple, PointerSize, PageSize, TrampolineSize,
-  ///                          IndirectStubsSize).
-class GetRemoteInfo
-    : public shared::RPCFunction<
-          GetRemoteInfo,
-          std::tuple<std::string, uint32_t, uint32_t, uint32_t, uint32_t>()> {
-public:
-  static const char *getName() { return "GetRemoteInfo"; }
-};
-
-  /// Get the address of a remote symbol.
-class GetSymbolAddress
-    : public shared::RPCFunction<GetSymbolAddress,
-                                 JITTargetAddress(std::string SymbolName)> {
-public:
-  static const char *getName() { return "GetSymbolAddress"; }
-};
-
-  /// Request that the host execute a compile callback.
-class RequestCompile
-    : public shared::RPCFunction<
-          RequestCompile, JITTargetAddress(JITTargetAddress TrampolineAddr)> {
-public:
-  static const char *getName() { return "RequestCompile"; }
-};
-
-  /// Notify the remote and terminate the session.
-class TerminateSession : public shared::RPCFunction<TerminateSession, void()> {
-public:
-  static const char *getName() { return "TerminateSession"; }
-};
-
-} // namespace utils
-
-class OrcRemoteTargetRPCAPI
-    : public shared::SingleThreadedRPCEndpoint<shared::RawByteChannel> {
-public:
-  // FIXME: Remove constructors once MSVC supports synthesizing move-ops.
-  OrcRemoteTargetRPCAPI(shared::RawByteChannel &C)
-      : shared::SingleThreadedRPCEndpoint<shared::RawByteChannel>(C, true) {}
-};
-
-} // end namespace remote
-
-} // end namespace orc
-} // end namespace llvm
-
-#endif // LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETRPCAPI_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h b/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h
deleted file mode 100644
index ce9bf064303d..000000000000
--- a/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h
+++ /dev/null
@@ -1,464 +0,0 @@
-//===- OrcRemoteTargetServer.h - Orc Remote-target Server -------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the OrcRemoteTargetServer class. It can be used to build a
-// JIT server that can execute code sent from an OrcRemoteTargetClient.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETSERVER_H
-#define LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETSERVER_H
-
-#include "llvm/ExecutionEngine/JITSymbol.h"
-#include "llvm/ExecutionEngine/Orc/IndirectionUtils.h"
-#include "llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h"
-#include "llvm/ExecutionEngine/Orc/Shared/OrcError.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/Error.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/Host.h"
-#include "llvm/Support/Memory.h"
-#include "llvm/Support/Process.h"
-#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
-#include <cassert>
-#include <cstddef>
-#include <cstdint>
-#include <functional>
-#include <map>
-#include <memory>
-#include <string>
-#include <system_error>
-#include <tuple>
-#include <type_traits>
-#include <vector>
-
-#define DEBUG_TYPE "orc-remote"
-
-namespace llvm {
-namespace orc {
-namespace remote {
-
-template <typename ChannelT, typename TargetT>
-class OrcRemoteTargetServer
-    : public shared::SingleThreadedRPCEndpoint<shared::RawByteChannel> {
-public:
-  using SymbolLookupFtor =
-      std::function<JITTargetAddress(const std::string &Name)>;
-
-  using EHFrameRegistrationFtor =
-      std::function<void(uint8_t *Addr, uint32_t Size)>;
-
-  OrcRemoteTargetServer(ChannelT &Channel, SymbolLookupFtor SymbolLookup,
-                        EHFrameRegistrationFtor EHFramesRegister,
-                        EHFrameRegistrationFtor EHFramesDeregister)
-      : shared::SingleThreadedRPCEndpoint<shared::RawByteChannel>(Channel,
-                                                                  true),
-        SymbolLookup(std::move(SymbolLookup)),
-        EHFramesRegister(std::move(EHFramesRegister)),
-        EHFramesDeregister(std::move(EHFramesDeregister)) {
-    using ThisT = std::remove_reference_t<decltype(*this)>;
-    addHandler<exec::CallIntVoid>(*this, &ThisT::handleCallIntVoid);
-    addHandler<exec::CallIntInt>(*this, &ThisT::handleCallIntInt);
-    addHandler<exec::CallMain>(*this, &ThisT::handleCallMain);
-    addHandler<exec::CallVoidVoid>(*this, &ThisT::handleCallVoidVoid);
-    addHandler<mem::CreateRemoteAllocator>(*this,
-                                           &ThisT::handleCreateRemoteAllocator);
-    addHandler<mem::DestroyRemoteAllocator>(
-        *this, &ThisT::handleDestroyRemoteAllocator);
-    addHandler<mem::ReadMem>(*this, &ThisT::handleReadMem);
-    addHandler<mem::ReserveMem>(*this, &ThisT::handleReserveMem);
-    addHandler<mem::SetProtections>(*this, &ThisT::handleSetProtections);
-    addHandler<mem::WriteMem>(*this, &ThisT::handleWriteMem);
-    addHandler<mem::WritePtr>(*this, &ThisT::handleWritePtr);
-    addHandler<eh::RegisterEHFrames>(*this, &ThisT::handleRegisterEHFrames);
-    addHandler<eh::DeregisterEHFrames>(*this, &ThisT::handleDeregisterEHFrames);
-    addHandler<stubs::CreateIndirectStubsOwner>(
-        *this, &ThisT::handleCreateIndirectStubsOwner);
-    addHandler<stubs::DestroyIndirectStubsOwner>(
-        *this, &ThisT::handleDestroyIndirectStubsOwner);
-    addHandler<stubs::EmitIndirectStubs>(*this,
-                                         &ThisT::handleEmitIndirectStubs);
-    addHandler<stubs::EmitResolverBlock>(*this,
-                                         &ThisT::handleEmitResolverBlock);
-    addHandler<stubs::EmitTrampolineBlock>(*this,
-                                           &ThisT::handleEmitTrampolineBlock);
-    addHandler<utils::GetSymbolAddress>(*this, &ThisT::handleGetSymbolAddress);
-    addHandler<utils::GetRemoteInfo>(*this, &ThisT::handleGetRemoteInfo);
-    addHandler<utils::TerminateSession>(*this, &ThisT::handleTerminateSession);
-  }
-
-  // FIXME: Remove move/copy ops once MSVC supports synthesizing move ops.
-  OrcRemoteTargetServer(const OrcRemoteTargetServer &) = delete;
-  OrcRemoteTargetServer &operator=(const OrcRemoteTargetServer &) = delete;
-
-  OrcRemoteTargetServer(OrcRemoteTargetServer &&Other) = default;
-  OrcRemoteTargetServer &operator=(OrcRemoteTargetServer &&) = delete;
-
-  Expected<JITTargetAddress> requestCompile(JITTargetAddress TrampolineAddr) {
-    return callB<utils::RequestCompile>(TrampolineAddr);
-  }
-
-  bool receivedTerminate() const { return TerminateFlag; }
-
-private:
-  struct Allocator {
-    Allocator() = default;
-    Allocator(Allocator &&Other) : Allocs(std::move(Other.Allocs)) {}
-
-    Allocator &operator=(Allocator &&Other) {
-      Allocs = std::move(Other.Allocs);
-      return *this;
-    }
-
-    ~Allocator() {
-      for (auto &Alloc : Allocs)
-        sys::Memory::releaseMappedMemory(Alloc.second);
-    }
-
-    Error allocate(void *&Addr, size_t Size, uint32_t Align) {
-      std::error_code EC;
-      sys::MemoryBlock MB = sys::Memory::allocateMappedMemory(
-          Size, nullptr, sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC);
-      if (EC)
-        return errorCodeToError(EC);
-
-      Addr = MB.base();
-      assert(Allocs.find(MB.base()) == Allocs.end() && "Duplicate alloc");
-      Allocs[MB.base()] = std::move(MB);
-      return Error::success();
-    }
-
-    Error setProtections(void *block, unsigned Flags) {
-      auto I = Allocs.find(block);
-      if (I == Allocs.end())
-        return errorCodeToError(orcError(OrcErrorCode::RemoteMProtectAddrUnrecognized));
-      return errorCodeToError(
-          sys::Memory::protectMappedMemory(I->second, Flags));
-    }
-
-  private:
-    std::map<void *, sys::MemoryBlock> Allocs;
-  };
-
-  static Error doNothing() { return Error::success(); }
-
-  static JITTargetAddress reenter(void *JITTargetAddr, void *TrampolineAddr) {
-    auto T = static_cast<OrcRemoteTargetServer *>(JITTargetAddr);
-    auto AddrOrErr = T->requestCompile(static_cast<JITTargetAddress>(
-        reinterpret_cast<uintptr_t>(TrampolineAddr)));
-    // FIXME: Allow customizable failure substitution functions.
-    assert(AddrOrErr && "Compile request failed");
-    return *AddrOrErr;
-  }
-
-  Expected<int32_t> handleCallIntVoid(JITTargetAddress Addr) {
-    using IntVoidFnTy = int (*)();
-
-    IntVoidFnTy Fn =
-        reinterpret_cast<IntVoidFnTy>(static_cast<uintptr_t>(Addr));
-
-    LLVM_DEBUG(dbgs() << "  Calling " << format("0x%016x", Addr) << "\n");
-    int Result = Fn();
-    LLVM_DEBUG(dbgs() << "  Result = " << Result << "\n");
-
-    return Result;
-  }
-
-  Expected<int32_t> handleCallIntInt(JITTargetAddress Addr, int Arg) {
-    using IntIntFnTy = int (*)(int);
-
-    IntIntFnTy Fn = reinterpret_cast<IntIntFnTy>(static_cast<uintptr_t>(Addr));
-
-    LLVM_DEBUG(dbgs() << "  Calling " << format("0x%016x", Addr)
-                      << " with argument " << Arg << "\n");
-    int Result = Fn(Arg);
-    LLVM_DEBUG(dbgs() << "  Result = " << Result << "\n");
-
-    return Result;
-  }
-
-  Expected<int32_t> handleCallMain(JITTargetAddress Addr,
-                                   std::vector<std::string> Args) {
-    using MainFnTy = int (*)(int, const char *[]);
-
-    MainFnTy Fn = reinterpret_cast<MainFnTy>(static_cast<uintptr_t>(Addr));
-    int ArgC = Args.size() + 1;
-    int Idx = 1;
-    std::unique_ptr<const char *[]> ArgV(new const char *[ArgC + 1]);
-    ArgV[0] = "<jit process>";
-    for (auto &Arg : Args)
-      ArgV[Idx++] = Arg.c_str();
-    ArgV[ArgC] = 0;
-    LLVM_DEBUG(for (int Idx = 0; Idx < ArgC; ++Idx) {
-      llvm::dbgs() << "Arg " << Idx << ": " << ArgV[Idx] << "\n";
-    });
-
-    LLVM_DEBUG(dbgs() << "  Calling " << format("0x%016x", Addr) << "\n");
-    int Result = Fn(ArgC, ArgV.get());
-    LLVM_DEBUG(dbgs() << "  Result = " << Result << "\n");
-
-    return Result;
-  }
-
-  Error handleCallVoidVoid(JITTargetAddress Addr) {
-    using VoidVoidFnTy = void (*)();
-
-    VoidVoidFnTy Fn =
-        reinterpret_cast<VoidVoidFnTy>(static_cast<uintptr_t>(Addr));
-
-    LLVM_DEBUG(dbgs() << "  Calling " << format("0x%016x", Addr) << "\n");
-    Fn();
-    LLVM_DEBUG(dbgs() << "  Complete.\n");
-
-    return Error::success();
-  }
-
-  Error handleCreateRemoteAllocator(ResourceIdMgr::ResourceId Id) {
-    auto I = Allocators.find(Id);
-    if (I != Allocators.end())
-      return errorCodeToError(
-               orcError(OrcErrorCode::RemoteAllocatorIdAlreadyInUse));
-    LLVM_DEBUG(dbgs() << "  Created allocator " << Id << "\n");
-    Allocators[Id] = Allocator();
-    return Error::success();
-  }
-
-  Error handleCreateIndirectStubsOwner(ResourceIdMgr::ResourceId Id) {
-    auto I = IndirectStubsOwners.find(Id);
-    if (I != IndirectStubsOwners.end())
-      return errorCodeToError(
-               orcError(OrcErrorCode::RemoteIndirectStubsOwnerIdAlreadyInUse));
-    LLVM_DEBUG(dbgs() << "  Create indirect stubs owner " << Id << "\n");
-    IndirectStubsOwners[Id] = ISBlockOwnerList();
-    return Error::success();
-  }
-
-  Error handleDeregisterEHFrames(JITTargetAddress TAddr, uint32_t Size) {
-    uint8_t *Addr = reinterpret_cast<uint8_t *>(static_cast<uintptr_t>(TAddr));
-    LLVM_DEBUG(dbgs() << "  Registering EH frames at "
-                      << format("0x%016x", TAddr) << ", Size = " << Size
-                      << " bytes\n");
-    EHFramesDeregister(Addr, Size);
-    return Error::success();
-  }
-
-  Error handleDestroyRemoteAllocator(ResourceIdMgr::ResourceId Id) {
-    auto I = Allocators.find(Id);
-    if (I == Allocators.end())
-      return errorCodeToError(
-               orcError(OrcErrorCode::RemoteAllocatorDoesNotExist));
-    Allocators.erase(I);
-    LLVM_DEBUG(dbgs() << "  Destroyed allocator " << Id << "\n");
-    return Error::success();
-  }
-
-  Error handleDestroyIndirectStubsOwner(ResourceIdMgr::ResourceId Id) {
-    auto I = IndirectStubsOwners.find(Id);
-    if (I == IndirectStubsOwners.end())
-      return errorCodeToError(
-               orcError(OrcErrorCode::RemoteIndirectStubsOwnerDoesNotExist));
-    IndirectStubsOwners.erase(I);
-    return Error::success();
-  }
-
-  Expected<std::tuple<JITTargetAddress, JITTargetAddress, uint32_t>>
-  handleEmitIndirectStubs(ResourceIdMgr::ResourceId Id,
-                          uint32_t NumStubsRequired) {
-    LLVM_DEBUG(dbgs() << "  ISMgr " << Id << " request " << NumStubsRequired
-                      << " stubs.\n");
-
-    auto StubOwnerItr = IndirectStubsOwners.find(Id);
-    if (StubOwnerItr == IndirectStubsOwners.end())
-      return errorCodeToError(
-               orcError(OrcErrorCode::RemoteIndirectStubsOwnerDoesNotExist));
-
-    auto IS = LocalIndirectStubsInfo<TargetT>::create(
-        NumStubsRequired, sys::Process::getPageSizeEstimate());
-    if (!IS)
-      return IS.takeError();
-
-    JITTargetAddress StubsBase = pointerToJITTargetAddress(IS->getStub(0));
-    JITTargetAddress PtrsBase = pointerToJITTargetAddress(IS->getPtr(0));
-    uint32_t NumStubsEmitted = IS->getNumStubs();
-
-    auto &BlockList = StubOwnerItr->second;
-    BlockList.push_back(std::move(*IS));
-
-    return std::make_tuple(StubsBase, PtrsBase, NumStubsEmitted);
-  }
-
-  Error handleEmitResolverBlock() {
-    std::error_code EC;
-    ResolverBlock = sys::OwningMemoryBlock(sys::Memory::allocateMappedMemory(
-        TargetT::ResolverCodeSize, nullptr,
-        sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC));
-    if (EC)
-      return errorCodeToError(EC);
-
-    TargetT::writeResolverCode(static_cast<char *>(ResolverBlock.base()),
-                               pointerToJITTargetAddress(ResolverBlock.base()),
-                               pointerToJITTargetAddress(&reenter),
-                               pointerToJITTargetAddress(this));
-
-    return errorCodeToError(sys::Memory::protectMappedMemory(
-        ResolverBlock.getMemoryBlock(),
-        sys::Memory::MF_READ | sys::Memory::MF_EXEC));
-  }
-
-  Expected<std::tuple<JITTargetAddress, uint32_t>> handleEmitTrampolineBlock() {
-    std::error_code EC;
-    auto TrampolineBlock =
-        sys::OwningMemoryBlock(sys::Memory::allocateMappedMemory(
-            sys::Process::getPageSizeEstimate(), nullptr,
-            sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC));
-    if (EC)
-      return errorCodeToError(EC);
-
-    uint32_t NumTrampolines =
-        (sys::Process::getPageSizeEstimate() - TargetT::PointerSize) /
-        TargetT::TrampolineSize;
-
-    char *TrampolineMem = static_cast<char *>(TrampolineBlock.base());
-    TargetT::writeTrampolines(
-        TrampolineMem, pointerToJITTargetAddress(TrampolineMem),
-        pointerToJITTargetAddress(ResolverBlock.base()), NumTrampolines);
-
-    EC = sys::Memory::protectMappedMemory(TrampolineBlock.getMemoryBlock(),
-                                          sys::Memory::MF_READ |
-                                              sys::Memory::MF_EXEC);
-
-    TrampolineBlocks.push_back(std::move(TrampolineBlock));
-
-    return std::make_tuple(pointerToJITTargetAddress(TrampolineMem),
-                           NumTrampolines);
-  }
-
-  Expected<JITTargetAddress> handleGetSymbolAddress(const std::string &Name) {
-    JITTargetAddress Addr = SymbolLookup(Name);
-    LLVM_DEBUG(dbgs() << "  Symbol '" << Name
-                      << "' =  " << format("0x%016x", Addr) << "\n");
-    return Addr;
-  }
-
-  Expected<std::tuple<std::string, uint32_t, uint32_t, uint32_t, uint32_t>>
-  handleGetRemoteInfo() {
-    std::string ProcessTriple = sys::getProcessTriple();
-    uint32_t PointerSize = TargetT::PointerSize;
-    uint32_t PageSize = sys::Process::getPageSizeEstimate();
-    uint32_t TrampolineSize = TargetT::TrampolineSize;
-    uint32_t IndirectStubSize = TargetT::StubSize;
-    LLVM_DEBUG(dbgs() << "  Remote info:\n"
-                      << "    triple             = '" << ProcessTriple << "'\n"
-                      << "    pointer size       = " << PointerSize << "\n"
-                      << "    page size          = " << PageSize << "\n"
-                      << "    trampoline size    = " << TrampolineSize << "\n"
-                      << "    indirect stub size = " << IndirectStubSize
-                      << "\n");
-    return std::make_tuple(ProcessTriple, PointerSize, PageSize, TrampolineSize,
-                           IndirectStubSize);
-  }
-
-  Expected<std::vector<uint8_t>> handleReadMem(JITTargetAddress RSrc,
-                                               uint64_t Size) {
-    uint8_t *Src = reinterpret_cast<uint8_t *>(static_cast<uintptr_t>(RSrc));
-
-    LLVM_DEBUG(dbgs() << "  Reading " << Size << " bytes from "
-                      << format("0x%016x", RSrc) << "\n");
-
-    std::vector<uint8_t> Buffer;
-    Buffer.resize(Size);
-    for (uint8_t *P = Src; Size != 0; --Size)
-      Buffer.push_back(*P++);
-
-    return Buffer;
-  }
-
-  Error handleRegisterEHFrames(JITTargetAddress TAddr, uint32_t Size) {
-    uint8_t *Addr = reinterpret_cast<uint8_t *>(static_cast<uintptr_t>(TAddr));
-    LLVM_DEBUG(dbgs() << "  Registering EH frames at "
-                      << format("0x%016x", TAddr) << ", Size = " << Size
-                      << " bytes\n");
-    EHFramesRegister(Addr, Size);
-    return Error::success();
-  }
-
-  Expected<JITTargetAddress> handleReserveMem(ResourceIdMgr::ResourceId Id,
-                                              uint64_t Size, uint32_t Align) {
-    auto I = Allocators.find(Id);
-    if (I == Allocators.end())
-      return errorCodeToError(
-               orcError(OrcErrorCode::RemoteAllocatorDoesNotExist));
-    auto &Allocator = I->second;
-    void *LocalAllocAddr = nullptr;
-    if (auto Err = Allocator.allocate(LocalAllocAddr, Size, Align))
-      return std::move(Err);
-
-    LLVM_DEBUG(dbgs() << "  Allocator " << Id << " reserved " << LocalAllocAddr
-                      << " (" << Size << " bytes, alignment " << Align
-                      << ")\n");
-
-    JITTargetAddress AllocAddr = static_cast<JITTargetAddress>(
-        reinterpret_cast<uintptr_t>(LocalAllocAddr));
-
-    return AllocAddr;
-  }
-
-  Error handleSetProtections(ResourceIdMgr::ResourceId Id,
-                             JITTargetAddress Addr, uint32_t Flags) {
-    auto I = Allocators.find(Id);
-    if (I == Allocators.end())
-      return errorCodeToError(
-               orcError(OrcErrorCode::RemoteAllocatorDoesNotExist));
-    auto &Allocator = I->second;
-    void *LocalAddr = reinterpret_cast<void *>(static_cast<uintptr_t>(Addr));
-    LLVM_DEBUG(dbgs() << "  Allocator " << Id << " set permissions on "
-                      << LocalAddr << " to "
-                      << (Flags & sys::Memory::MF_READ ? 'R' : '-')
-                      << (Flags & sys::Memory::MF_WRITE ? 'W' : '-')
-                      << (Flags & sys::Memory::MF_EXEC ? 'X' : '-') << "\n");
-    return Allocator.setProtections(LocalAddr, Flags);
-  }
-
-  Error handleTerminateSession() {
-    TerminateFlag = true;
-    return Error::success();
-  }
-
-  Error handleWriteMem(DirectBufferWriter DBW) {
-    LLVM_DEBUG(dbgs() << "  Writing " << DBW.getSize() << " bytes to "
-                      << format("0x%016x", DBW.getDst()) << "\n");
-    return Error::success();
-  }
-
-  Error handleWritePtr(JITTargetAddress Addr, JITTargetAddress PtrVal) {
-    LLVM_DEBUG(dbgs() << "  Writing pointer *" << format("0x%016x", Addr)
-                      << " = " << format("0x%016x", PtrVal) << "\n");
-    uintptr_t *Ptr =
-        reinterpret_cast<uintptr_t *>(static_cast<uintptr_t>(Addr));
-    *Ptr = static_cast<uintptr_t>(PtrVal);
-    return Error::success();
-  }
-
-  SymbolLookupFtor SymbolLookup;
-  EHFrameRegistrationFtor EHFramesRegister, EHFramesDeregister;
-  std::map<ResourceIdMgr::ResourceId, Allocator> Allocators;
-  using ISBlockOwnerList = std::vector<LocalIndirectStubsInfo<TargetT>>;
-  std::map<ResourceIdMgr::ResourceId, ISBlockOwnerList> IndirectStubsOwners;
-  sys::OwningMemoryBlock ResolverBlock;
-  std::vector<sys::OwningMemoryBlock> TrampolineBlocks;
-  bool TerminateFlag = false;
-};
-
-} // end namespace remote
-} // end namespace orc
-} // end namespace llvm
-
-#undef DEBUG_TYPE
-
-#endif // LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETSERVER_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h
index 78a6623d7594..3c0b2b9edd52 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h
@@ -34,25 +34,26 @@ private:
 };
 
 /// Represents an address in the executor process.
-class ExecutorAddress {
+class ExecutorAddr {
 public:
-  ExecutorAddress() = default;
-  explicit ExecutorAddress(uint64_t Addr) : Addr(Addr) {}
+  ExecutorAddr() = default;
 
-  /// Create an ExecutorAddress from the given pointer.
+  /// Create an ExecutorAddr from the given value.
+  explicit ExecutorAddr(uint64_t Addr) : Addr(Addr) {}
+
+  /// Create an ExecutorAddr from the given pointer.
   /// Warning: This should only be used when JITing in-process.
-  template <typename T> static ExecutorAddress fromPtr(T *Value) {
-    return ExecutorAddress(
+  template <typename T> static ExecutorAddr fromPtr(T *Value) {
+    return ExecutorAddr(
         static_cast<uint64_t>(reinterpret_cast<uintptr_t>(Value)));
   }
 
-  /// Cast this ExecutorAddress to a pointer of the given type.
-  /// Warning: This should only be esude when JITing in-process.
+  /// Cast this ExecutorAddr to a pointer of the given type.
+  /// Warning: This should only be used when JITing in-process.
   template <typename T> T toPtr() const {
     static_assert(std::is_pointer<T>::value, "T must be a pointer type");
     uintptr_t IntPtr = static_cast<uintptr_t>(Addr);
-    assert(IntPtr == Addr &&
-           "JITTargetAddress value out of range for uintptr_t");
+    assert(IntPtr == Addr && "ExecutorAddr value out of range for uintptr_t");
     return reinterpret_cast<T>(IntPtr);
   }
 
@@ -62,53 +63,47 @@ public:
 
   explicit operator bool() const { return Addr != 0; }
 
-  friend bool operator==(const ExecutorAddress &LHS,
-                         const ExecutorAddress &RHS) {
+  friend bool operator==(const ExecutorAddr &LHS, const ExecutorAddr &RHS) {
     return LHS.Addr == RHS.Addr;
   }
 
-  friend bool operator!=(const ExecutorAddress &LHS,
-                         const ExecutorAddress &RHS) {
+  friend bool operator!=(const ExecutorAddr &LHS, const ExecutorAddr &RHS) {
     return LHS.Addr != RHS.Addr;
   }
 
-  friend bool operator<(const ExecutorAddress &LHS,
-                        const ExecutorAddress &RHS) {
+  friend bool operator<(const ExecutorAddr &LHS, const ExecutorAddr &RHS) {
     return LHS.Addr < RHS.Addr;
   }
 
-  friend bool operator<=(const ExecutorAddress &LHS,
-                         const ExecutorAddress &RHS) {
+  friend bool operator<=(const ExecutorAddr &LHS, const ExecutorAddr &RHS) {
     return LHS.Addr <= RHS.Addr;
   }
 
-  friend bool operator>(const ExecutorAddress &LHS,
-                        const ExecutorAddress &RHS) {
+  friend bool operator>(const ExecutorAddr &LHS, const ExecutorAddr &RHS) {
     return LHS.Addr > RHS.Addr;
   }
 
-  friend bool operator>=(const ExecutorAddress &LHS,
-                         const ExecutorAddress &RHS) {
+  friend bool operator>=(const ExecutorAddr &LHS, const ExecutorAddr &RHS) {
     return LHS.Addr >= RHS.Addr;
   }
 
-  ExecutorAddress &operator++() {
+  ExecutorAddr &operator++() {
     ++Addr;
     return *this;
   }
-  ExecutorAddress &operator--() {
+  ExecutorAddr &operator--() {
     --Addr;
     return *this;
   }
-  ExecutorAddress operator++(int) { return ExecutorAddress(Addr++); }
-  ExecutorAddress operator--(int) { return ExecutorAddress(Addr++); }
+  ExecutorAddr operator++(int) { return ExecutorAddr(Addr++); }
+  ExecutorAddr operator--(int) { return ExecutorAddr(Addr--); }
 
-  ExecutorAddress &operator+=(const ExecutorAddrDiff Delta) {
+  ExecutorAddr &operator+=(const ExecutorAddrDiff Delta) {
     Addr += Delta.getValue();
     return *this;
   }
 
-  ExecutorAddress &operator-=(const ExecutorAddrDiff Delta) {
+  ExecutorAddr &operator-=(const ExecutorAddrDiff Delta) {
     Addr -= Delta.getValue();
     return *this;
   }
@@ -118,83 +113,98 @@ private:
 };
 
 /// Subtracting two addresses yields an offset.
-inline ExecutorAddrDiff operator-(const ExecutorAddress &LHS,
-                                  const ExecutorAddress &RHS) {
+inline ExecutorAddrDiff operator-(const ExecutorAddr &LHS,
+                                  const ExecutorAddr &RHS) {
   return ExecutorAddrDiff(LHS.getValue() - RHS.getValue());
 }
 
 /// Adding an offset and an address yields an address.
-inline ExecutorAddress operator+(const ExecutorAddress &LHS,
-                                 const ExecutorAddrDiff &RHS) {
-  return ExecutorAddress(LHS.getValue() + RHS.getValue());
+inline ExecutorAddr operator+(const ExecutorAddr &LHS,
+                              const ExecutorAddrDiff &RHS) {
+  return ExecutorAddr(LHS.getValue() + RHS.getValue());
 }
 
 /// Adding an address and an offset yields an address.
-inline ExecutorAddress operator+(const ExecutorAddrDiff &LHS,
-                                 const ExecutorAddress &RHS) {
-  return ExecutorAddress(LHS.getValue() + RHS.getValue());
+inline ExecutorAddr operator+(const ExecutorAddrDiff &LHS,
+                              const ExecutorAddr &RHS) {
+  return ExecutorAddr(LHS.getValue() + RHS.getValue());
 }
 
 /// Represents an address range in the exceutor process.
-struct ExecutorAddressRange {
-  ExecutorAddressRange() = default;
-  ExecutorAddressRange(ExecutorAddress StartAddress, ExecutorAddress EndAddress)
-      : StartAddress(StartAddress), EndAddress(EndAddress) {}
+struct ExecutorAddrRange {
+  ExecutorAddrRange() = default;
+  ExecutorAddrRange(ExecutorAddr Start, ExecutorAddr End)
+      : Start(Start), End(End) {}
+  ExecutorAddrRange(ExecutorAddr Start, ExecutorAddrDiff Size)
+      : Start(Start), End(Start + Size) {}
 
-  bool empty() const { return StartAddress == EndAddress; }
-  ExecutorAddrDiff size() const { return EndAddress - StartAddress; }
+  bool empty() const { return Start == End; }
+  ExecutorAddrDiff size() const { return End - Start; }
 
-  ExecutorAddress StartAddress;
-  ExecutorAddress EndAddress;
+  friend bool operator==(const ExecutorAddrRange &LHS,
+                         const ExecutorAddrRange &RHS) {
+    return LHS.Start == RHS.Start && LHS.End == RHS.End;
+  }
+  friend bool operator!=(const ExecutorAddrRange &LHS,
+                         const ExecutorAddrRange &RHS) {
+    return !(LHS == RHS);
+  }
+  bool contains(ExecutorAddr Addr) const { return Start <= Addr && Addr < End; }
+  bool overlaps(const ExecutorAddrRange &Other) {
+    return !(Other.End <= Start || End <= Other.Start);
+  }
+
+  ExecutorAddr Start;
+  ExecutorAddr End;
 };
 
 namespace shared {
 
-/// SPS serializatior for ExecutorAddress.
-template <> class SPSSerializationTraits<SPSExecutorAddress, ExecutorAddress> {
+class SPSExecutorAddr {};
+
+/// SPS serializatior for ExecutorAddr.
+template <> class SPSSerializationTraits<SPSExecutorAddr, ExecutorAddr> {
 public:
-  static size_t size(const ExecutorAddress &EA) {
+  static size_t size(const ExecutorAddr &EA) {
     return SPSArgList<uint64_t>::size(EA.getValue());
   }
 
-  static bool serialize(SPSOutputBuffer &BOB, const ExecutorAddress &EA) {
+  static bool serialize(SPSOutputBuffer &BOB, const ExecutorAddr &EA) {
     return SPSArgList<uint64_t>::serialize(BOB, EA.getValue());
   }
 
-  static bool deserialize(SPSInputBuffer &BIB, ExecutorAddress &EA) {
+  static bool deserialize(SPSInputBuffer &BIB, ExecutorAddr &EA) {
     uint64_t Tmp;
     if (!SPSArgList<uint64_t>::deserialize(BIB, Tmp))
       return false;
-    EA = ExecutorAddress(Tmp);
+    EA = ExecutorAddr(Tmp);
     return true;
   }
 };
 
-using SPSExecutorAddressRange =
-    SPSTuple<SPSExecutorAddress, SPSExecutorAddress>;
+using SPSExecutorAddrRange = SPSTuple<SPSExecutorAddr, SPSExecutorAddr>;
 
 /// Serialization traits for address ranges.
 template <>
-class SPSSerializationTraits<SPSExecutorAddressRange, ExecutorAddressRange> {
+class SPSSerializationTraits<SPSExecutorAddrRange, ExecutorAddrRange> {
 public:
-  static size_t size(const ExecutorAddressRange &Value) {
-    return SPSArgList<SPSExecutorAddress, SPSExecutorAddress>::size(
-        Value.StartAddress, Value.EndAddress);
+  static size_t size(const ExecutorAddrRange &Value) {
+    return SPSArgList<SPSExecutorAddr, SPSExecutorAddr>::size(Value.Start,
+                                                              Value.End);
   }
 
-  static bool serialize(SPSOutputBuffer &BOB,
-                        const ExecutorAddressRange &Value) {
-    return SPSArgList<SPSExecutorAddress, SPSExecutorAddress>::serialize(
-        BOB, Value.StartAddress, Value.EndAddress);
+  static bool serialize(SPSOutputBuffer &BOB, const ExecutorAddrRange &Value) {
+    return SPSArgList<SPSExecutorAddr, SPSExecutorAddr>::serialize(
+        BOB, Value.Start, Value.End);
   }
 
-  static bool deserialize(SPSInputBuffer &BIB, ExecutorAddressRange &Value) {
-    return SPSArgList<SPSExecutorAddress, SPSExecutorAddress>::deserialize(
-        BIB, Value.StartAddress, Value.EndAddress);
+  static bool deserialize(SPSInputBuffer &BIB, ExecutorAddrRange &Value) {
+    return SPSArgList<SPSExecutorAddr, SPSExecutorAddr>::deserialize(
+        BIB, Value.Start, Value.End);
   }
 };
 
-using SPSExecutorAddressRangeSequence = SPSSequence<SPSExecutorAddressRange>;
+using SPSExecutorAddrRangeSequence = SPSSequence<SPSExecutorAddrRange>;
 
 } // End namespace shared.
 } // End namespace orc.
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/FDRawByteChannel.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/FDRawByteChannel.h
deleted file mode 100644
index 3f96fe3da49d..000000000000
--- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/FDRawByteChannel.h
+++ /dev/null
@@ -1,79 +0,0 @@
-//===- FDRawByteChannel.h - File descriptor based byte-channel -*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// File descriptor based RawByteChannel.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_EXECUTIONENGINE_ORC_SHARED_FDRAWBYTECHANNEL_H
-#define LLVM_EXECUTIONENGINE_ORC_SHARED_FDRAWBYTECHANNEL_H
-
-#include "llvm/ExecutionEngine/Orc/Shared/RawByteChannel.h"
-
-#if !defined(_MSC_VER) && !defined(__MINGW32__)
-#include <unistd.h>
-#else
-#include <io.h>
-#endif
-
-namespace llvm {
-namespace orc {
-namespace shared {
-
-/// Serialization channel that reads from and writes from file descriptors.
-class FDRawByteChannel final : public RawByteChannel {
-public:
-  FDRawByteChannel(int InFD, int OutFD) : InFD(InFD), OutFD(OutFD) {}
-
-  llvm::Error readBytes(char *Dst, unsigned Size) override {
-    assert(Dst && "Attempt to read into null.");
-    ssize_t Completed = 0;
-    while (Completed < static_cast<ssize_t>(Size)) {
-      ssize_t Read = ::read(InFD, Dst + Completed, Size - Completed);
-      if (Read <= 0) {
-        auto ErrNo = errno;
-        if (ErrNo == EAGAIN || ErrNo == EINTR)
-          continue;
-        else
-          return llvm::errorCodeToError(
-              std::error_code(errno, std::generic_category()));
-      }
-      Completed += Read;
-    }
-    return llvm::Error::success();
-  }
-
-  llvm::Error appendBytes(const char *Src, unsigned Size) override {
-    assert(Src && "Attempt to append from null.");
-    ssize_t Completed = 0;
-    while (Completed < static_cast<ssize_t>(Size)) {
-      ssize_t Written = ::write(OutFD, Src + Completed, Size - Completed);
-      if (Written < 0) {
-        auto ErrNo = errno;
-        if (ErrNo == EAGAIN || ErrNo == EINTR)
-          continue;
-        else
-          return llvm::errorCodeToError(
-              std::error_code(errno, std::generic_category()));
-      }
-      Completed += Written;
-    }
-    return llvm::Error::success();
-  }
-
-  llvm::Error send() override { return llvm::Error::success(); }
-
-private:
-  int InFD, OutFD;
-};
-
-} // namespace shared
-} // namespace orc
-} // namespace llvm
-
-#endif // LLVM_EXECUTIONENGINE_ORC_SHARED_FDRAWBYTECHANNEL_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h
new file mode 100644
index 000000000000..3ef43f33d84c
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h
@@ -0,0 +1,68 @@
+//===---- OrcRTBridge.h -- Utils for interacting with orc-rt ----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Declares types and symbol names provided by the ORC runtime.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_SHARED_ORCRTBRIDGE_H
+#define LLVM_EXECUTIONENGINE_ORC_SHARED_ORCRTBRIDGE_H
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h"
+#include "llvm/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.h"
+#include "llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h"
+
+namespace llvm {
+namespace orc {
+namespace rt {
+
+extern const char *SimpleExecutorDylibManagerInstanceName;
+extern const char *SimpleExecutorDylibManagerOpenWrapperName;
+extern const char *SimpleExecutorDylibManagerLookupWrapperName;
+
+extern const char *SimpleExecutorMemoryManagerInstanceName;
+extern const char *SimpleExecutorMemoryManagerReserveWrapperName;
+extern const char *SimpleExecutorMemoryManagerFinalizeWrapperName;
+extern const char *SimpleExecutorMemoryManagerDeallocateWrapperName;
+
+extern const char *MemoryWriteUInt8sWrapperName;
+extern const char *MemoryWriteUInt16sWrapperName;
+extern const char *MemoryWriteUInt32sWrapperName;
+extern const char *MemoryWriteUInt64sWrapperName;
+extern const char *MemoryWriteBuffersWrapperName;
+
+extern const char *RegisterEHFrameSectionCustomDirectWrapperName;
+extern const char *DeregisterEHFrameSectionCustomDirectWrapperName;
+
+extern const char *RunAsMainWrapperName;
+
+using SPSSimpleExecutorDylibManagerOpenSignature =
+    shared::SPSExpected<uint64_t>(shared::SPSExecutorAddr, shared::SPSString,
+                                  uint64_t);
+
+using SPSSimpleExecutorDylibManagerLookupSignature =
+    shared::SPSExpected<shared::SPSSequence<shared::SPSExecutorAddr>>(
+        shared::SPSExecutorAddr, uint64_t, shared::SPSRemoteSymbolLookupSet);
+
+using SPSSimpleExecutorMemoryManagerReserveSignature =
+    shared::SPSExpected<shared::SPSExecutorAddr>(shared::SPSExecutorAddr,
+                                                 uint64_t);
+using SPSSimpleExecutorMemoryManagerFinalizeSignature =
+    shared::SPSError(shared::SPSExecutorAddr, shared::SPSFinalizeRequest);
+using SPSSimpleExecutorMemoryManagerDeallocateSignature = shared::SPSError(
+    shared::SPSExecutorAddr, shared::SPSSequence<shared::SPSExecutorAddr>);
+
+using SPSRunAsMainSignature = int64_t(shared::SPSExecutorAddr,
+                                      shared::SPSSequence<shared::SPSString>);
+
+} // end namespace rt
+} // end namespace orc
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_SHARED_ORCRTBRIDGE_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/RPCUtils.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/RPCUtils.h
deleted file mode 100644
index 1ff47ce42758..000000000000
--- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/RPCUtils.h
+++ /dev/null
@@ -1,1659 +0,0 @@
-//===- RPCUtils.h - Utilities for building RPC APIs -------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Utilities to support construction of simple RPC APIs.
-//
-// The RPC utilities aim for ease of use (minimal conceptual overhead) for C++
-// programmers, high performance, low memory overhead, and efficient use of the
-// communications channel.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_EXECUTIONENGINE_ORC_SHARED_RPCUTILS_H
-#define LLVM_EXECUTIONENGINE_ORC_SHARED_RPCUTILS_H
-
-#include <map>
-#include <thread>
-#include <vector>
-
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ExecutionEngine/Orc/Shared/OrcError.h"
-#include "llvm/ExecutionEngine/Orc/Shared/Serialization.h"
-#include "llvm/Support/MSVCErrorWorkarounds.h"
-
-#include <future>
-
-namespace llvm {
-namespace orc {
-namespace shared {
-
-/// Base class of all fatal RPC errors (those that necessarily result in the
-/// termination of the RPC session).
-class RPCFatalError : public ErrorInfo<RPCFatalError> {
-public:
-  static char ID;
-};
-
-/// RPCConnectionClosed is returned from RPC operations if the RPC connection
-/// has already been closed due to either an error or graceful disconnection.
-class ConnectionClosed : public ErrorInfo<ConnectionClosed> {
-public:
-  static char ID;
-  std::error_code convertToErrorCode() const override;
-  void log(raw_ostream &OS) const override;
-};
-
-/// BadFunctionCall is returned from handleOne when the remote makes a call with
-/// an unrecognized function id.
-///
-/// This error is fatal because Orc RPC needs to know how to parse a function
-/// call to know where the next call starts, and if it doesn't recognize the
-/// function id it cannot parse the call.
-template <typename FnIdT, typename SeqNoT>
-class BadFunctionCall
-    : public ErrorInfo<BadFunctionCall<FnIdT, SeqNoT>, RPCFatalError> {
-public:
-  static char ID;
-
-  BadFunctionCall(FnIdT FnId, SeqNoT SeqNo)
-      : FnId(std::move(FnId)), SeqNo(std::move(SeqNo)) {}
-
-  std::error_code convertToErrorCode() const override {
-    return orcError(OrcErrorCode::UnexpectedRPCCall);
-  }
-
-  void log(raw_ostream &OS) const override {
-    OS << "Call to invalid RPC function id '" << FnId
-       << "' with "
-          "sequence number "
-       << SeqNo;
-  }
-
-private:
-  FnIdT FnId;
-  SeqNoT SeqNo;
-};
-
-template <typename FnIdT, typename SeqNoT>
-char BadFunctionCall<FnIdT, SeqNoT>::ID = 0;
-
-/// InvalidSequenceNumberForResponse is returned from handleOne when a response
-/// call arrives with a sequence number that doesn't correspond to any in-flight
-/// function call.
-///
-/// This error is fatal because Orc RPC needs to know how to parse the rest of
-/// the response call to know where the next call starts, and if it doesn't have
-/// a result parser for this sequence number it can't do that.
-template <typename SeqNoT>
-class InvalidSequenceNumberForResponse
-    : public ErrorInfo<InvalidSequenceNumberForResponse<SeqNoT>,
-                       RPCFatalError> {
-public:
-  static char ID;
-
-  InvalidSequenceNumberForResponse(SeqNoT SeqNo) : SeqNo(std::move(SeqNo)) {}
-
-  std::error_code convertToErrorCode() const override {
-    return orcError(OrcErrorCode::UnexpectedRPCCall);
-  };
-
-  void log(raw_ostream &OS) const override {
-    OS << "Response has unknown sequence number " << SeqNo;
-  }
-
-private:
-  SeqNoT SeqNo;
-};
-
-template <typename SeqNoT>
-char InvalidSequenceNumberForResponse<SeqNoT>::ID = 0;
-
-/// This non-fatal error will be passed to asynchronous result handlers in place
-/// of a result if the connection goes down before a result returns, or if the
-/// function to be called cannot be negotiated with the remote.
-class ResponseAbandoned : public ErrorInfo<ResponseAbandoned> {
-public:
-  static char ID;
-
-  std::error_code convertToErrorCode() const override;
-  void log(raw_ostream &OS) const override;
-};
-
-/// This error is returned if the remote does not have a handler installed for
-/// the given RPC function.
-class CouldNotNegotiate : public ErrorInfo<CouldNotNegotiate> {
-public:
-  static char ID;
-
-  CouldNotNegotiate(std::string Signature);
-  std::error_code convertToErrorCode() const override;
-  void log(raw_ostream &OS) const override;
-  const std::string &getSignature() const { return Signature; }
-
-private:
-  std::string Signature;
-};
-
-template <typename DerivedFunc, typename FnT> class RPCFunction;
-
-// RPC Function class.
-// DerivedFunc should be a user defined class with a static 'getName()' method
-// returning a const char* representing the function's name.
-template <typename DerivedFunc, typename RetT, typename... ArgTs>
-class RPCFunction<DerivedFunc, RetT(ArgTs...)> {
-public:
-  /// User defined function type.
-  using Type = RetT(ArgTs...);
-
-  /// Return type.
-  using ReturnType = RetT;
-
-  /// Returns the full function prototype as a string.
-  static const char *getPrototype() {
-    static std::string Name = [] {
-      std::string Name;
-      raw_string_ostream(Name)
-          << SerializationTypeName<RetT>::getName() << " "
-          << DerivedFunc::getName() << "("
-          << SerializationTypeNameSequence<ArgTs...>() << ")";
-      return Name;
-    }();
-    return Name.data();
-  }
-};
-
-/// Allocates RPC function ids during autonegotiation.
-/// Specializations of this class must provide four members:
-///
-/// static T getInvalidId():
-///   Should return a reserved id that will be used to represent missing
-/// functions during autonegotiation.
-///
-/// static T getResponseId():
-///   Should return a reserved id that will be used to send function responses
-/// (return values).
-///
-/// static T getNegotiateId():
-///   Should return a reserved id for the negotiate function, which will be used
-/// to negotiate ids for user defined functions.
-///
-/// template <typename Func> T allocate():
-///   Allocate a unique id for function Func.
-template <typename T, typename = void> class RPCFunctionIdAllocator;
-
-/// This specialization of RPCFunctionIdAllocator provides a default
-/// implementation for integral types.
-template <typename T>
-class RPCFunctionIdAllocator<T, std::enable_if_t<std::is_integral<T>::value>> {
-public:
-  static T getInvalidId() { return T(0); }
-  static T getResponseId() { return T(1); }
-  static T getNegotiateId() { return T(2); }
-
-  template <typename Func> T allocate() { return NextId++; }
-
-private:
-  T NextId = 3;
-};
-
-namespace detail {
-
-/// Provides a typedef for a tuple containing the decayed argument types.
-template <typename T> class RPCFunctionArgsTuple;
-
-template <typename RetT, typename... ArgTs>
-class RPCFunctionArgsTuple<RetT(ArgTs...)> {
-public:
-  using Type = std::tuple<std::decay_t<std::remove_reference_t<ArgTs>>...>;
-};
-
-// ResultTraits provides typedefs and utilities specific to the return type
-// of functions.
-template <typename RetT> class ResultTraits {
-public:
-  // The return type wrapped in llvm::Expected.
-  using ErrorReturnType = Expected<RetT>;
-
-#ifdef _MSC_VER
-  // The ErrorReturnType wrapped in a std::promise.
-  using ReturnPromiseType = std::promise<MSVCPExpected<RetT>>;
-
-  // The ErrorReturnType wrapped in a std::future.
-  using ReturnFutureType = std::future<MSVCPExpected<RetT>>;
-#else
-  // The ErrorReturnType wrapped in a std::promise.
-  using ReturnPromiseType = std::promise<ErrorReturnType>;
-
-  // The ErrorReturnType wrapped in a std::future.
-  using ReturnFutureType = std::future<ErrorReturnType>;
-#endif
-
-  // Create a 'blank' value of the ErrorReturnType, ready and safe to
-  // overwrite.
-  static ErrorReturnType createBlankErrorReturnValue() {
-    return ErrorReturnType(RetT());
-  }
-
-  // Consume an abandoned ErrorReturnType.
-  static void consumeAbandoned(ErrorReturnType RetOrErr) {
-    consumeError(RetOrErr.takeError());
-  }
-
-  static ErrorReturnType returnError(Error Err) { return std::move(Err); }
-};
-
-// ResultTraits specialization for void functions.
-template <> class ResultTraits<void> {
-public:
-  // For void functions, ErrorReturnType is llvm::Error.
-  using ErrorReturnType = Error;
-
-#ifdef _MSC_VER
-  // The ErrorReturnType wrapped in a std::promise.
-  using ReturnPromiseType = std::promise<MSVCPError>;
-
-  // The ErrorReturnType wrapped in a std::future.
-  using ReturnFutureType = std::future<MSVCPError>;
-#else
-  // The ErrorReturnType wrapped in a std::promise.
-  using ReturnPromiseType = std::promise<ErrorReturnType>;
-
-  // The ErrorReturnType wrapped in a std::future.
-  using ReturnFutureType = std::future<ErrorReturnType>;
-#endif
-
-  // Create a 'blank' value of the ErrorReturnType, ready and safe to
-  // overwrite.
-  static ErrorReturnType createBlankErrorReturnValue() {
-    return ErrorReturnType::success();
-  }
-
-  // Consume an abandoned ErrorReturnType.
-  static void consumeAbandoned(ErrorReturnType Err) {
-    consumeError(std::move(Err));
-  }
-
-  static ErrorReturnType returnError(Error Err) { return Err; }
-};
-
-// ResultTraits<Error> is equivalent to ResultTraits<void>. This allows
-// handlers for void RPC functions to return either void (in which case they
-// implicitly succeed) or Error (in which case their error return is
-// propagated). See usage in HandlerTraits::runHandlerHelper.
-template <> class ResultTraits<Error> : public ResultTraits<void> {};
-
-// ResultTraits<Expected<T>> is equivalent to ResultTraits<T>. This allows
-// handlers for RPC functions returning a T to return either a T (in which
-// case they implicitly succeed) or Expected<T> (in which case their error
-// return is propagated). See usage in HandlerTraits::runHandlerHelper.
-template <typename RetT>
-class ResultTraits<Expected<RetT>> : public ResultTraits<RetT> {};
-
-// Determines whether an RPC function's defined error return type supports
-// error return value.
-template <typename T> class SupportsErrorReturn {
-public:
-  static const bool value = false;
-};
-
-template <> class SupportsErrorReturn<Error> {
-public:
-  static const bool value = true;
-};
-
-template <typename T> class SupportsErrorReturn<Expected<T>> {
-public:
-  static const bool value = true;
-};
-
-// RespondHelper packages return values based on whether or not the declared
-// RPC function return type supports error returns.
-template <bool FuncSupportsErrorReturn> class RespondHelper;
-
-// RespondHelper specialization for functions that support error returns.
-template <> class RespondHelper<true> {
-public:
-  // Send Expected<T>.
-  template <typename WireRetT, typename HandlerRetT, typename ChannelT,
-            typename FunctionIdT, typename SequenceNumberT>
-  static Error sendResult(ChannelT &C, const FunctionIdT &ResponseId,
-                          SequenceNumberT SeqNo,
-                          Expected<HandlerRetT> ResultOrErr) {
-    if (!ResultOrErr && ResultOrErr.template errorIsA<RPCFatalError>())
-      return ResultOrErr.takeError();
-
-    // Open the response message.
-    if (auto Err = C.startSendMessage(ResponseId, SeqNo))
-      return Err;
-
-    // Serialize the result.
-    if (auto Err =
-            SerializationTraits<ChannelT, WireRetT, Expected<HandlerRetT>>::
-                serialize(C, std::move(ResultOrErr)))
-      return Err;
-
-    // Close the response message.
-    if (auto Err = C.endSendMessage())
-      return Err;
-    return C.send();
-  }
-
-  template <typename ChannelT, typename FunctionIdT, typename SequenceNumberT>
-  static Error sendResult(ChannelT &C, const FunctionIdT &ResponseId,
-                          SequenceNumberT SeqNo, Error Err) {
-    if (Err && Err.isA<RPCFatalError>())
-      return Err;
-    if (auto Err2 = C.startSendMessage(ResponseId, SeqNo))
-      return Err2;
-    if (auto Err2 = serializeSeq(C, std::move(Err)))
-      return Err2;
-    if (auto Err2 = C.endSendMessage())
-      return Err2;
-    return C.send();
-  }
-};
-
-// RespondHelper specialization for functions that do not support error returns.
-template <> class RespondHelper<false> {
-public:
-  template <typename WireRetT, typename HandlerRetT, typename ChannelT,
-            typename FunctionIdT, typename SequenceNumberT>
-  static Error sendResult(ChannelT &C, const FunctionIdT &ResponseId,
-                          SequenceNumberT SeqNo,
-                          Expected<HandlerRetT> ResultOrErr) {
-    if (auto Err = ResultOrErr.takeError())
-      return Err;
-
-    // Open the response message.
-    if (auto Err = C.startSendMessage(ResponseId, SeqNo))
-      return Err;
-
-    // Serialize the result.
-    if (auto Err =
-            SerializationTraits<ChannelT, WireRetT, HandlerRetT>::serialize(
-                C, *ResultOrErr))
-      return Err;
-
-    // End the response message.
-    if (auto Err = C.endSendMessage())
-      return Err;
-
-    return C.send();
-  }
-
-  template <typename ChannelT, typename FunctionIdT, typename SequenceNumberT>
-  static Error sendResult(ChannelT &C, const FunctionIdT &ResponseId,
-                          SequenceNumberT SeqNo, Error Err) {
-    if (Err)
-      return Err;
-    if (auto Err2 = C.startSendMessage(ResponseId, SeqNo))
-      return Err2;
-    if (auto Err2 = C.endSendMessage())
-      return Err2;
-    return C.send();
-  }
-};
-
-// Send a response of the given wire return type (WireRetT) over the
-// channel, with the given sequence number.
-template <typename WireRetT, typename HandlerRetT, typename ChannelT,
-          typename FunctionIdT, typename SequenceNumberT>
-Error respond(ChannelT &C, const FunctionIdT &ResponseId, SequenceNumberT SeqNo,
-              Expected<HandlerRetT> ResultOrErr) {
-  return RespondHelper<SupportsErrorReturn<WireRetT>::value>::
-      template sendResult<WireRetT>(C, ResponseId, SeqNo,
-                                    std::move(ResultOrErr));
-}
-
-// Send an empty response message on the given channel to indicate that
-// the handler ran.
-template <typename WireRetT, typename ChannelT, typename FunctionIdT,
-          typename SequenceNumberT>
-Error respond(ChannelT &C, const FunctionIdT &ResponseId, SequenceNumberT SeqNo,
-              Error Err) {
-  return RespondHelper<SupportsErrorReturn<WireRetT>::value>::sendResult(
-      C, ResponseId, SeqNo, std::move(Err));
-}
-
-// Converts a given type to the equivalent error return type.
-template <typename T> class WrappedHandlerReturn {
-public:
-  using Type = Expected<T>;
-};
-
-template <typename T> class WrappedHandlerReturn<Expected<T>> {
-public:
-  using Type = Expected<T>;
-};
-
-template <> class WrappedHandlerReturn<void> {
-public:
-  using Type = Error;
-};
-
-template <> class WrappedHandlerReturn<Error> {
-public:
-  using Type = Error;
-};
-
-template <> class WrappedHandlerReturn<ErrorSuccess> {
-public:
-  using Type = Error;
-};
-
-// Traits class that strips the response function from the list of handler
-// arguments.
-template <typename FnT> class AsyncHandlerTraits;
-
-template <typename ResultT, typename... ArgTs>
-class AsyncHandlerTraits<Error(std::function<Error(Expected<ResultT>)>,
-                               ArgTs...)> {
-public:
-  using Type = Error(ArgTs...);
-  using ResultType = Expected<ResultT>;
-};
-
-template <typename... ArgTs>
-class AsyncHandlerTraits<Error(std::function<Error(Error)>, ArgTs...)> {
-public:
-  using Type = Error(ArgTs...);
-  using ResultType = Error;
-};
-
-template <typename... ArgTs>
-class AsyncHandlerTraits<ErrorSuccess(std::function<Error(Error)>, ArgTs...)> {
-public:
-  using Type = Error(ArgTs...);
-  using ResultType = Error;
-};
-
-template <typename... ArgTs>
-class AsyncHandlerTraits<void(std::function<Error(Error)>, ArgTs...)> {
-public:
-  using Type = Error(ArgTs...);
-  using ResultType = Error;
-};
-
-template <typename ResponseHandlerT, typename... ArgTs>
-class AsyncHandlerTraits<Error(ResponseHandlerT, ArgTs...)>
-    : public AsyncHandlerTraits<Error(std::decay_t<ResponseHandlerT>,
-                                      ArgTs...)> {};
-
-// This template class provides utilities related to RPC function handlers.
-// The base case applies to non-function types (the template class is
-// specialized for function types) and inherits from the appropriate
-// speciilization for the given non-function type's call operator.
-template <typename HandlerT>
-class HandlerTraits
-    : public HandlerTraits<
-          decltype(&std::remove_reference<HandlerT>::type::operator())> {};
-
-// Traits for handlers with a given function type.
-template <typename RetT, typename... ArgTs>
-class HandlerTraits<RetT(ArgTs...)> {
-public:
-  // Function type of the handler.
-  using Type = RetT(ArgTs...);
-
-  // Return type of the handler.
-  using ReturnType = RetT;
-
-  // Call the given handler with the given arguments.
-  template <typename HandlerT, typename... TArgTs>
-  static typename WrappedHandlerReturn<RetT>::Type
-  unpackAndRun(HandlerT &Handler, std::tuple<TArgTs...> &Args) {
-    return unpackAndRunHelper(Handler, Args,
-                              std::index_sequence_for<TArgTs...>());
-  }
-
-  // Call the given handler with the given arguments.
-  template <typename HandlerT, typename ResponderT, typename... TArgTs>
-  static Error unpackAndRunAsync(HandlerT &Handler, ResponderT &Responder,
-                                 std::tuple<TArgTs...> &Args) {
-    return unpackAndRunAsyncHelper(Handler, Responder, Args,
-                                   std::index_sequence_for<TArgTs...>());
-  }
-
-  // Call the given handler with the given arguments.
-  template <typename HandlerT>
-  static std::enable_if_t<
-      std::is_void<typename HandlerTraits<HandlerT>::ReturnType>::value, Error>
-  run(HandlerT &Handler, ArgTs &&...Args) {
-    Handler(std::move(Args)...);
-    return Error::success();
-  }
-
-  template <typename HandlerT, typename... TArgTs>
-  static std::enable_if_t<
-      !std::is_void<typename HandlerTraits<HandlerT>::ReturnType>::value,
-      typename HandlerTraits<HandlerT>::ReturnType>
-  run(HandlerT &Handler, TArgTs... Args) {
-    return Handler(std::move(Args)...);
-  }
-
-  // Serialize arguments to the channel.
-  template <typename ChannelT, typename... CArgTs>
-  static Error serializeArgs(ChannelT &C, const CArgTs... CArgs) {
-    return SequenceSerialization<ChannelT, ArgTs...>::serialize(C, CArgs...);
-  }
-
-  // Deserialize arguments from the channel.
-  template <typename ChannelT, typename... CArgTs>
-  static Error deserializeArgs(ChannelT &C, std::tuple<CArgTs...> &Args) {
-    return deserializeArgsHelper(C, Args, std::index_sequence_for<CArgTs...>());
-  }
-
-private:
-  template <typename ChannelT, typename... CArgTs, size_t... Indexes>
-  static Error deserializeArgsHelper(ChannelT &C, std::tuple<CArgTs...> &Args,
-                                     std::index_sequence<Indexes...> _) {
-    return SequenceSerialization<ChannelT, ArgTs...>::deserialize(
-        C, std::get<Indexes>(Args)...);
-  }
-
-  template <typename HandlerT, typename ArgTuple, size_t... Indexes>
-  static typename WrappedHandlerReturn<
-      typename HandlerTraits<HandlerT>::ReturnType>::Type
-  unpackAndRunHelper(HandlerT &Handler, ArgTuple &Args,
-                     std::index_sequence<Indexes...>) {
-    return run(Handler, std::move(std::get<Indexes>(Args))...);
-  }
-
-  template <typename HandlerT, typename ResponderT, typename ArgTuple,
-            size_t... Indexes>
-  static typename WrappedHandlerReturn<
-      typename HandlerTraits<HandlerT>::ReturnType>::Type
-  unpackAndRunAsyncHelper(HandlerT &Handler, ResponderT &Responder,
-                          ArgTuple &Args, std::index_sequence<Indexes...>) {
-    return run(Handler, Responder, std::move(std::get<Indexes>(Args))...);
-  }
-};
-
-// Handler traits for free functions.
-template <typename RetT, typename... ArgTs>
-class HandlerTraits<RetT (*)(ArgTs...)> : public HandlerTraits<RetT(ArgTs...)> {
-};
-
-// Handler traits for class methods (especially call operators for lambdas).
-template <typename Class, typename RetT, typename... ArgTs>
-class HandlerTraits<RetT (Class::*)(ArgTs...)>
-    : public HandlerTraits<RetT(ArgTs...)> {};
-
-// Handler traits for const class methods (especially call operators for
-// lambdas).
-template <typename Class, typename RetT, typename... ArgTs>
-class HandlerTraits<RetT (Class::*)(ArgTs...) const>
-    : public HandlerTraits<RetT(ArgTs...)> {};
-
-// Utility to peel the Expected wrapper off a response handler error type.
-template <typename HandlerT> class ResponseHandlerArg;
-
-template <typename ArgT> class ResponseHandlerArg<Error(Expected<ArgT>)> {
-public:
-  using ArgType = Expected<ArgT>;
-  using UnwrappedArgType = ArgT;
-};
-
-template <typename ArgT>
-class ResponseHandlerArg<ErrorSuccess(Expected<ArgT>)> {
-public:
-  using ArgType = Expected<ArgT>;
-  using UnwrappedArgType = ArgT;
-};
-
-template <> class ResponseHandlerArg<Error(Error)> {
-public:
-  using ArgType = Error;
-};
-
-template <> class ResponseHandlerArg<ErrorSuccess(Error)> {
-public:
-  using ArgType = Error;
-};
-
-// ResponseHandler represents a handler for a not-yet-received function call
-// result.
-template <typename ChannelT> class ResponseHandler {
-public:
-  virtual ~ResponseHandler() {}
-
-  // Reads the function result off the wire and acts on it. The meaning of
-  // "act" will depend on how this method is implemented in any given
-  // ResponseHandler subclass but could, for example, mean running a
-  // user-specified handler or setting a promise value.
-  virtual Error handleResponse(ChannelT &C) = 0;
-
-  // Abandons this outstanding result.
-  virtual void abandon() = 0;
-
-  // Create an error instance representing an abandoned response.
-  static Error createAbandonedResponseError() {
-    return make_error<ResponseAbandoned>();
-  }
-};
-
-// ResponseHandler subclass for RPC functions with non-void returns.
-template <typename ChannelT, typename FuncRetT, typename HandlerT>
-class ResponseHandlerImpl : public ResponseHandler<ChannelT> {
-public:
-  ResponseHandlerImpl(HandlerT Handler) : Handler(std::move(Handler)) {}
-
-  // Handle the result by deserializing it from the channel then passing it
-  // to the user defined handler.
-  Error handleResponse(ChannelT &C) override {
-    using UnwrappedArgType = typename ResponseHandlerArg<
-        typename HandlerTraits<HandlerT>::Type>::UnwrappedArgType;
-    UnwrappedArgType Result;
-    if (auto Err =
-            SerializationTraits<ChannelT, FuncRetT,
-                                UnwrappedArgType>::deserialize(C, Result))
-      return Err;
-    if (auto Err = C.endReceiveMessage())
-      return Err;
-    return Handler(std::move(Result));
-  }
-
-  // Abandon this response by calling the handler with an 'abandoned response'
-  // error.
-  void abandon() override {
-    if (auto Err = Handler(this->createAbandonedResponseError())) {
-      // Handlers should not fail when passed an abandoned response error.
-      report_fatal_error(std::move(Err));
-    }
-  }
-
-private:
-  HandlerT Handler;
-};
-
-// ResponseHandler subclass for RPC functions with void returns.
-template <typename ChannelT, typename HandlerT>
-class ResponseHandlerImpl<ChannelT, void, HandlerT>
-    : public ResponseHandler<ChannelT> {
-public:
-  ResponseHandlerImpl(HandlerT Handler) : Handler(std::move(Handler)) {}
-
-  // Handle the result (no actual value, just a notification that the function
-  // has completed on the remote end) by calling the user-defined handler with
-  // Error::success().
-  Error handleResponse(ChannelT &C) override {
-    if (auto Err = C.endReceiveMessage())
-      return Err;
-    return Handler(Error::success());
-  }
-
-  // Abandon this response by calling the handler with an 'abandoned response'
-  // error.
-  void abandon() override {
-    if (auto Err = Handler(this->createAbandonedResponseError())) {
-      // Handlers should not fail when passed an abandoned response error.
-      report_fatal_error(std::move(Err));
-    }
-  }
-
-private:
-  HandlerT Handler;
-};
-
-template <typename ChannelT, typename FuncRetT, typename HandlerT>
-class ResponseHandlerImpl<ChannelT, Expected<FuncRetT>, HandlerT>
-    : public ResponseHandler<ChannelT> {
-public:
-  ResponseHandlerImpl(HandlerT Handler) : Handler(std::move(Handler)) {}
-
-  // Handle the result by deserializing it from the channel then passing it
-  // to the user defined handler.
-  Error handleResponse(ChannelT &C) override {
-    using HandlerArgType = typename ResponseHandlerArg<
-        typename HandlerTraits<HandlerT>::Type>::ArgType;
-    HandlerArgType Result((typename HandlerArgType::value_type()));
-
-    if (auto Err = SerializationTraits<ChannelT, Expected<FuncRetT>,
-                                       HandlerArgType>::deserialize(C, Result))
-      return Err;
-    if (auto Err = C.endReceiveMessage())
-      return Err;
-    return Handler(std::move(Result));
-  }
-
-  // Abandon this response by calling the handler with an 'abandoned response'
-  // error.
-  void abandon() override {
-    if (auto Err = Handler(this->createAbandonedResponseError())) {
-      // Handlers should not fail when passed an abandoned response error.
-      report_fatal_error(std::move(Err));
-    }
-  }
-
-private:
-  HandlerT Handler;
-};
-
-template <typename ChannelT, typename HandlerT>
-class ResponseHandlerImpl<ChannelT, Error, HandlerT>
-    : public ResponseHandler<ChannelT> {
-public:
-  ResponseHandlerImpl(HandlerT Handler) : Handler(std::move(Handler)) {}
-
-  // Handle the result by deserializing it from the channel then passing it
-  // to the user defined handler.
-  Error handleResponse(ChannelT &C) override {
-    Error Result = Error::success();
-    if (auto Err = SerializationTraits<ChannelT, Error, Error>::deserialize(
-            C, Result)) {
-      consumeError(std::move(Result));
-      return Err;
-    }
-    if (auto Err = C.endReceiveMessage()) {
-      consumeError(std::move(Result));
-      return Err;
-    }
-    return Handler(std::move(Result));
-  }
-
-  // Abandon this response by calling the handler with an 'abandoned response'
-  // error.
-  void abandon() override {
-    if (auto Err = Handler(this->createAbandonedResponseError())) {
-      // Handlers should not fail when passed an abandoned response error.
-      report_fatal_error(std::move(Err));
-    }
-  }
-
-private:
-  HandlerT Handler;
-};
-
-// Create a ResponseHandler from a given user handler.
-template <typename ChannelT, typename FuncRetT, typename HandlerT>
-std::unique_ptr<ResponseHandler<ChannelT>> createResponseHandler(HandlerT H) {
-  return std::make_unique<ResponseHandlerImpl<ChannelT, FuncRetT, HandlerT>>(
-      std::move(H));
-}
-
-// Helper for wrapping member functions up as functors. This is useful for
-// installing methods as result handlers.
-template <typename ClassT, typename RetT, typename... ArgTs>
-class MemberFnWrapper {
-public:
-  using MethodT = RetT (ClassT::*)(ArgTs...);
-  MemberFnWrapper(ClassT &Instance, MethodT Method)
-      : Instance(Instance), Method(Method) {}
-  RetT operator()(ArgTs &&...Args) {
-    return (Instance.*Method)(std::move(Args)...);
-  }
-
-private:
-  ClassT &Instance;
-  MethodT Method;
-};
-
-// Helper that provides a Functor for deserializing arguments.
-template <typename... ArgTs> class ReadArgs {
-public:
-  Error operator()() { return Error::success(); }
-};
-
-template <typename ArgT, typename... ArgTs>
-class ReadArgs<ArgT, ArgTs...> : public ReadArgs<ArgTs...> {
-public:
-  ReadArgs(ArgT &Arg, ArgTs &...Args) : ReadArgs<ArgTs...>(Args...), Arg(Arg) {}
-
-  Error operator()(ArgT &ArgVal, ArgTs &...ArgVals) {
-    this->Arg = std::move(ArgVal);
-    return ReadArgs<ArgTs...>::operator()(ArgVals...);
-  }
-
-private:
-  ArgT &Arg;
-};
-
-// Manage sequence numbers.
-template <typename SequenceNumberT> class SequenceNumberManager {
-public:
-  // Reset, making all sequence numbers available.
-  void reset() {
-    std::lock_guard<std::mutex> Lock(SeqNoLock);
-    NextSequenceNumber = 0;
-    FreeSequenceNumbers.clear();
-  }
-
-  // Get the next available sequence number. Will re-use numbers that have
-  // been released.
-  SequenceNumberT getSequenceNumber() {
-    std::lock_guard<std::mutex> Lock(SeqNoLock);
-    if (FreeSequenceNumbers.empty())
-      return NextSequenceNumber++;
-    auto SequenceNumber = FreeSequenceNumbers.back();
-    FreeSequenceNumbers.pop_back();
-    return SequenceNumber;
-  }
-
-  // Release a sequence number, making it available for re-use.
-  void releaseSequenceNumber(SequenceNumberT SequenceNumber) {
-    std::lock_guard<std::mutex> Lock(SeqNoLock);
-    FreeSequenceNumbers.push_back(SequenceNumber);
-  }
-
-private:
-  std::mutex SeqNoLock;
-  SequenceNumberT NextSequenceNumber = 0;
-  std::vector<SequenceNumberT> FreeSequenceNumbers;
-};
-
-// Checks that predicate P holds for each corresponding pair of type arguments
-// from T1 and T2 tuple.
-template <template <class, class> class P, typename T1Tuple, typename T2Tuple>
-class RPCArgTypeCheckHelper;
-
-template <template <class, class> class P>
-class RPCArgTypeCheckHelper<P, std::tuple<>, std::tuple<>> {
-public:
-  static const bool value = true;
-};
-
-template <template <class, class> class P, typename T, typename... Ts,
-          typename U, typename... Us>
-class RPCArgTypeCheckHelper<P, std::tuple<T, Ts...>, std::tuple<U, Us...>> {
-public:
-  static const bool value =
-      P<T, U>::value &&
-      RPCArgTypeCheckHelper<P, std::tuple<Ts...>, std::tuple<Us...>>::value;
-};
-
-template <template <class, class> class P, typename T1Sig, typename T2Sig>
-class RPCArgTypeCheck {
-public:
-  using T1Tuple = typename RPCFunctionArgsTuple<T1Sig>::Type;
-  using T2Tuple = typename RPCFunctionArgsTuple<T2Sig>::Type;
-
-  static_assert(std::tuple_size<T1Tuple>::value >=
-                    std::tuple_size<T2Tuple>::value,
-                "Too many arguments to RPC call");
-  static_assert(std::tuple_size<T1Tuple>::value <=
-                    std::tuple_size<T2Tuple>::value,
-                "Too few arguments to RPC call");
-
-  static const bool value = RPCArgTypeCheckHelper<P, T1Tuple, T2Tuple>::value;
-};
-
-template <typename ChannelT, typename WireT, typename ConcreteT>
-class CanSerialize {
-private:
-  using S = SerializationTraits<ChannelT, WireT, ConcreteT>;
-
-  template <typename T>
-  static std::true_type check(
-      std::enable_if_t<std::is_same<decltype(T::serialize(
-                                        std::declval<ChannelT &>(),
-                                        std::declval<const ConcreteT &>())),
-                                    Error>::value,
-                       void *>);
-
-  template <typename> static std::false_type check(...);
-
-public:
-  static const bool value = decltype(check<S>(0))::value;
-};
-
-template <typename ChannelT, typename WireT, typename ConcreteT>
-class CanDeserialize {
-private:
-  using S = SerializationTraits<ChannelT, WireT, ConcreteT>;
-
-  template <typename T>
-  static std::true_type
-      check(std::enable_if_t<
-            std::is_same<decltype(T::deserialize(std::declval<ChannelT &>(),
-                                                 std::declval<ConcreteT &>())),
-                         Error>::value,
-            void *>);
-
-  template <typename> static std::false_type check(...);
-
-public:
-  static const bool value = decltype(check<S>(0))::value;
-};
-
-/// Contains primitive utilities for defining, calling and handling calls to
-/// remote procedures. ChannelT is a bidirectional stream conforming to the
-/// RPCChannel interface (see RPCChannel.h), FunctionIdT is a procedure
-/// identifier type that must be serializable on ChannelT, and SequenceNumberT
-/// is an integral type that will be used to number in-flight function calls.
-///
-/// These utilities support the construction of very primitive RPC utilities.
-/// Their intent is to ensure correct serialization and deserialization of
-/// procedure arguments, and to keep the client and server's view of the API in
-/// sync.
-template <typename ImplT, typename ChannelT, typename FunctionIdT,
-          typename SequenceNumberT>
-class RPCEndpointBase {
-protected:
-  class OrcRPCInvalid : public RPCFunction<OrcRPCInvalid, void()> {
-  public:
-    static const char *getName() { return "__orc_rpc$invalid"; }
-  };
-
-  class OrcRPCResponse : public RPCFunction<OrcRPCResponse, void()> {
-  public:
-    static const char *getName() { return "__orc_rpc$response"; }
-  };
-
-  class OrcRPCNegotiate
-      : public RPCFunction<OrcRPCNegotiate, FunctionIdT(std::string)> {
-  public:
-    static const char *getName() { return "__orc_rpc$negotiate"; }
-  };
-
-  // Helper predicate for testing for the presence of SerializeTraits
-  // serializers.
-  template <typename WireT, typename ConcreteT>
-  class CanSerializeCheck : detail::CanSerialize<ChannelT, WireT, ConcreteT> {
-  public:
-    using detail::CanSerialize<ChannelT, WireT, ConcreteT>::value;
-
-    static_assert(value, "Missing serializer for argument (Can't serialize the "
-                         "first template type argument of CanSerializeCheck "
-                         "from the second)");
-  };
-
-  // Helper predicate for testing for the presence of SerializeTraits
-  // deserializers.
-  template <typename WireT, typename ConcreteT>
-  class CanDeserializeCheck
-      : detail::CanDeserialize<ChannelT, WireT, ConcreteT> {
-  public:
-    using detail::CanDeserialize<ChannelT, WireT, ConcreteT>::value;
-
-    static_assert(value, "Missing deserializer for argument (Can't deserialize "
-                         "the second template type argument of "
-                         "CanDeserializeCheck from the first)");
-  };
-
-public:
-  /// Construct an RPC instance on a channel.
-  RPCEndpointBase(ChannelT &C, bool LazyAutoNegotiation)
-      : C(C), LazyAutoNegotiation(LazyAutoNegotiation) {
-    // Hold ResponseId in a special variable, since we expect Response to be
-    // called relatively frequently, and want to avoid the map lookup.
-    ResponseId = FnIdAllocator.getResponseId();
-    RemoteFunctionIds[OrcRPCResponse::getPrototype()] = ResponseId;
-
-    // Register the negotiate function id and handler.
-    auto NegotiateId = FnIdAllocator.getNegotiateId();
-    RemoteFunctionIds[OrcRPCNegotiate::getPrototype()] = NegotiateId;
-    Handlers[NegotiateId] = wrapHandler<OrcRPCNegotiate>(
-        [this](const std::string &Name) { return handleNegotiate(Name); });
-  }
-
-  /// Negotiate a function id for Func with the other end of the channel.
-  template <typename Func> Error negotiateFunction(bool Retry = false) {
-    return getRemoteFunctionId<Func>(true, Retry).takeError();
-  }
-
-  /// Append a call Func, does not call send on the channel.
-  /// The first argument specifies a user-defined handler to be run when the
-  /// function returns. The handler should take an Expected<Func::ReturnType>,
-  /// or an Error (if Func::ReturnType is void). The handler will be called
-  /// with an error if the return value is abandoned due to a channel error.
-  template <typename Func, typename HandlerT, typename... ArgTs>
-  Error appendCallAsync(HandlerT Handler, const ArgTs &...Args) {
-
-    static_assert(
-        detail::RPCArgTypeCheck<CanSerializeCheck, typename Func::Type,
-                                void(ArgTs...)>::value,
-        "");
-
-    // Look up the function ID.
-    FunctionIdT FnId;
-    if (auto FnIdOrErr = getRemoteFunctionId<Func>(LazyAutoNegotiation, false))
-      FnId = *FnIdOrErr;
-    else {
-      // Negotiation failed. Notify the handler then return the negotiate-failed
-      // error.
-      cantFail(Handler(make_error<ResponseAbandoned>()));
-      return FnIdOrErr.takeError();
-    }
-
-    SequenceNumberT SeqNo; // initialized in locked scope below.
-    {
-      // Lock the pending responses map and sequence number manager.
-      std::lock_guard<std::mutex> Lock(ResponsesMutex);
-
-      // Allocate a sequence number.
-      SeqNo = SequenceNumberMgr.getSequenceNumber();
-      assert(!PendingResponses.count(SeqNo) &&
-             "Sequence number already allocated");
-
-      // Install the user handler.
-      PendingResponses[SeqNo] =
-          detail::createResponseHandler<ChannelT, typename Func::ReturnType>(
-              std::move(Handler));
-    }
-
-    // Open the function call message.
-    if (auto Err = C.startSendMessage(FnId, SeqNo)) {
-      abandonPendingResponses();
-      return Err;
-    }
-
-    // Serialize the call arguments.
-    if (auto Err = detail::HandlerTraits<typename Func::Type>::serializeArgs(
-            C, Args...)) {
-      abandonPendingResponses();
-      return Err;
-    }
-
-    // Close the function call messagee.
-    if (auto Err = C.endSendMessage()) {
-      abandonPendingResponses();
-      return Err;
-    }
-
-    return Error::success();
-  }
-
-  Error sendAppendedCalls() { return C.send(); };
-
-  template <typename Func, typename HandlerT, typename... ArgTs>
-  Error callAsync(HandlerT Handler, const ArgTs &...Args) {
-    if (auto Err = appendCallAsync<Func>(std::move(Handler), Args...))
-      return Err;
-    return C.send();
-  }
-
-  /// Handle one incoming call.
-  Error handleOne() {
-    FunctionIdT FnId;
-    SequenceNumberT SeqNo;
-    if (auto Err = C.startReceiveMessage(FnId, SeqNo)) {
-      abandonPendingResponses();
-      return Err;
-    }
-    if (FnId == ResponseId)
-      return handleResponse(SeqNo);
-    auto I = Handlers.find(FnId);
-    if (I != Handlers.end())
-      return I->second(C, SeqNo);
-
-    // else: No handler found. Report error to client?
-    return make_error<BadFunctionCall<FunctionIdT, SequenceNumberT>>(FnId,
-                                                                     SeqNo);
-  }
-
-  /// Helper for handling setter procedures - this method returns a functor that
-  /// sets the variables referred to by Args... to values deserialized from the
-  /// channel.
-  /// E.g.
-  ///
-  ///   typedef Function<0, bool, int> Func1;
-  ///
-  ///   ...
-  ///   bool B;
-  ///   int I;
-  ///   if (auto Err = expect<Func1>(Channel, readArgs(B, I)))
-  ///     /* Handle Args */ ;
-  ///
-  template <typename... ArgTs>
-  static detail::ReadArgs<ArgTs...> readArgs(ArgTs &...Args) {
-    return detail::ReadArgs<ArgTs...>(Args...);
-  }
-
-  /// Abandon all outstanding result handlers.
-  ///
-  /// This will call all currently registered result handlers to receive an
-  /// "abandoned" error as their argument. This is used internally by the RPC
-  /// in error situations, but can also be called directly by clients who are
-  /// disconnecting from the remote and don't or can't expect responses to their
-  /// outstanding calls. (Especially for outstanding blocking calls, calling
-  /// this function may be necessary to avoid dead threads).
-  void abandonPendingResponses() {
-    // Lock the pending responses map and sequence number manager.
-    std::lock_guard<std::mutex> Lock(ResponsesMutex);
-
-    for (auto &KV : PendingResponses)
-      KV.second->abandon();
-    PendingResponses.clear();
-    SequenceNumberMgr.reset();
-  }
-
-  /// Remove the handler for the given function.
-  /// A handler must currently be registered for this function.
-  template <typename Func> void removeHandler() {
-    auto IdItr = LocalFunctionIds.find(Func::getPrototype());
-    assert(IdItr != LocalFunctionIds.end() &&
-           "Function does not have a registered handler");
-    auto HandlerItr = Handlers.find(IdItr->second);
-    assert(HandlerItr != Handlers.end() &&
-           "Function does not have a registered handler");
-    Handlers.erase(HandlerItr);
-  }
-
-  /// Clear all handlers.
-  void clearHandlers() { Handlers.clear(); }
-
-protected:
-  FunctionIdT getInvalidFunctionId() const {
-    return FnIdAllocator.getInvalidId();
-  }
-
-  /// Add the given handler to the handler map and make it available for
-  /// autonegotiation and execution.
-  template <typename Func, typename HandlerT>
-  void addHandlerImpl(HandlerT Handler) {
-
-    static_assert(detail::RPCArgTypeCheck<
-                      CanDeserializeCheck, typename Func::Type,
-                      typename detail::HandlerTraits<HandlerT>::Type>::value,
-                  "");
-
-    FunctionIdT NewFnId = FnIdAllocator.template allocate<Func>();
-    LocalFunctionIds[Func::getPrototype()] = NewFnId;
-    Handlers[NewFnId] = wrapHandler<Func>(std::move(Handler));
-  }
-
-  template <typename Func, typename HandlerT>
-  void addAsyncHandlerImpl(HandlerT Handler) {
-
-    static_assert(
-        detail::RPCArgTypeCheck<
-            CanDeserializeCheck, typename Func::Type,
-            typename detail::AsyncHandlerTraits<
-                typename detail::HandlerTraits<HandlerT>::Type>::Type>::value,
-        "");
-
-    FunctionIdT NewFnId = FnIdAllocator.template allocate<Func>();
-    LocalFunctionIds[Func::getPrototype()] = NewFnId;
-    Handlers[NewFnId] = wrapAsyncHandler<Func>(std::move(Handler));
-  }
-
-  Error handleResponse(SequenceNumberT SeqNo) {
-    using Handler = typename decltype(PendingResponses)::mapped_type;
-    Handler PRHandler;
-
-    {
-      // Lock the pending responses map and sequence number manager.
-      std::unique_lock<std::mutex> Lock(ResponsesMutex);
-      auto I = PendingResponses.find(SeqNo);
-
-      if (I != PendingResponses.end()) {
-        PRHandler = std::move(I->second);
-        PendingResponses.erase(I);
-        SequenceNumberMgr.releaseSequenceNumber(SeqNo);
-      } else {
-        // Unlock the pending results map to prevent recursive lock.
-        Lock.unlock();
-        abandonPendingResponses();
-        return make_error<InvalidSequenceNumberForResponse<SequenceNumberT>>(
-            SeqNo);
-      }
-    }
-
-    assert(PRHandler &&
-           "If we didn't find a response handler we should have bailed out");
-
-    if (auto Err = PRHandler->handleResponse(C)) {
-      abandonPendingResponses();
-      return Err;
-    }
-
-    return Error::success();
-  }
-
-  FunctionIdT handleNegotiate(const std::string &Name) {
-    auto I = LocalFunctionIds.find(Name);
-    if (I == LocalFunctionIds.end())
-      return getInvalidFunctionId();
-    return I->second;
-  }
-
-  // Find the remote FunctionId for the given function.
-  template <typename Func>
-  Expected<FunctionIdT> getRemoteFunctionId(bool NegotiateIfNotInMap,
-                                            bool NegotiateIfInvalid) {
-    bool DoNegotiate;
-
-    // Check if we already have a function id...
-    auto I = RemoteFunctionIds.find(Func::getPrototype());
-    if (I != RemoteFunctionIds.end()) {
-      // If it's valid there's nothing left to do.
-      if (I->second != getInvalidFunctionId())
-        return I->second;
-      DoNegotiate = NegotiateIfInvalid;
-    } else
-      DoNegotiate = NegotiateIfNotInMap;
-
-    // We don't have a function id for Func yet, but we're allowed to try to
-    // negotiate one.
-    if (DoNegotiate) {
-      auto &Impl = static_cast<ImplT &>(*this);
-      if (auto RemoteIdOrErr =
-              Impl.template callB<OrcRPCNegotiate>(Func::getPrototype())) {
-        RemoteFunctionIds[Func::getPrototype()] = *RemoteIdOrErr;
-        if (*RemoteIdOrErr == getInvalidFunctionId())
-          return make_error<CouldNotNegotiate>(Func::getPrototype());
-        return *RemoteIdOrErr;
-      } else
-        return RemoteIdOrErr.takeError();
-    }
-
-    // No key was available in the map and we weren't allowed to try to
-    // negotiate one, so return an unknown function error.
-    return make_error<CouldNotNegotiate>(Func::getPrototype());
-  }
-
-  using WrappedHandlerFn = std::function<Error(ChannelT &, SequenceNumberT)>;
-
-  // Wrap the given user handler in the necessary argument-deserialization code,
-  // result-serialization code, and call to the launch policy (if present).
-  template <typename Func, typename HandlerT>
-  WrappedHandlerFn wrapHandler(HandlerT Handler) {
-    return [this, Handler](ChannelT &Channel,
-                           SequenceNumberT SeqNo) mutable -> Error {
-      // Start by deserializing the arguments.
-      using ArgsTuple = typename detail::RPCFunctionArgsTuple<
-          typename detail::HandlerTraits<HandlerT>::Type>::Type;
-      auto Args = std::make_shared<ArgsTuple>();
-
-      if (auto Err =
-              detail::HandlerTraits<typename Func::Type>::deserializeArgs(
-                  Channel, *Args))
-        return Err;
-
-      // GCC 4.7 and 4.8 incorrectly issue a -Wunused-but-set-variable warning
-      // for RPCArgs. Void cast RPCArgs to work around this for now.
-      // FIXME: Remove this workaround once we can assume a working GCC version.
-      (void)Args;
-
-      // End receieve message, unlocking the channel for reading.
-      if (auto Err = Channel.endReceiveMessage())
-        return Err;
-
-      using HTraits = detail::HandlerTraits<HandlerT>;
-      using FuncReturn = typename Func::ReturnType;
-      return detail::respond<FuncReturn>(Channel, ResponseId, SeqNo,
-                                         HTraits::unpackAndRun(Handler, *Args));
-    };
-  }
-
-  // Wrap the given user handler in the necessary argument-deserialization code,
-  // result-serialization code, and call to the launch policy (if present).
-  template <typename Func, typename HandlerT>
-  WrappedHandlerFn wrapAsyncHandler(HandlerT Handler) {
-    return [this, Handler](ChannelT &Channel,
-                           SequenceNumberT SeqNo) mutable -> Error {
-      // Start by deserializing the arguments.
-      using AHTraits = detail::AsyncHandlerTraits<
-          typename detail::HandlerTraits<HandlerT>::Type>;
-      using ArgsTuple =
-          typename detail::RPCFunctionArgsTuple<typename AHTraits::Type>::Type;
-      auto Args = std::make_shared<ArgsTuple>();
-
-      if (auto Err =
-              detail::HandlerTraits<typename Func::Type>::deserializeArgs(
-                  Channel, *Args))
-        return Err;
-
-      // GCC 4.7 and 4.8 incorrectly issue a -Wunused-but-set-variable warning
-      // for RPCArgs. Void cast RPCArgs to work around this for now.
-      // FIXME: Remove this workaround once we can assume a working GCC version.
-      (void)Args;
-
-      // End receieve message, unlocking the channel for reading.
-      if (auto Err = Channel.endReceiveMessage())
-        return Err;
-
-      using HTraits = detail::HandlerTraits<HandlerT>;
-      using FuncReturn = typename Func::ReturnType;
-      auto Responder = [this,
-                        SeqNo](typename AHTraits::ResultType RetVal) -> Error {
-        return detail::respond<FuncReturn>(C, ResponseId, SeqNo,
-                                           std::move(RetVal));
-      };
-
-      return HTraits::unpackAndRunAsync(Handler, Responder, *Args);
-    };
-  }
-
-  ChannelT &C;
-
-  bool LazyAutoNegotiation;
-
-  RPCFunctionIdAllocator<FunctionIdT> FnIdAllocator;
-
-  FunctionIdT ResponseId;
-  std::map<std::string, FunctionIdT> LocalFunctionIds;
-  std::map<const char *, FunctionIdT> RemoteFunctionIds;
-
-  std::map<FunctionIdT, WrappedHandlerFn> Handlers;
-
-  std::mutex ResponsesMutex;
-  detail::SequenceNumberManager<SequenceNumberT> SequenceNumberMgr;
-  std::map<SequenceNumberT, std::unique_ptr<detail::ResponseHandler<ChannelT>>>
-      PendingResponses;
-};
-
-} // end namespace detail
-
-template <typename ChannelT, typename FunctionIdT = uint32_t,
-          typename SequenceNumberT = uint32_t>
-class MultiThreadedRPCEndpoint
-    : public detail::RPCEndpointBase<
-          MultiThreadedRPCEndpoint<ChannelT, FunctionIdT, SequenceNumberT>,
-          ChannelT, FunctionIdT, SequenceNumberT> {
-private:
-  using BaseClass = detail::RPCEndpointBase<
-      MultiThreadedRPCEndpoint<ChannelT, FunctionIdT, SequenceNumberT>,
-      ChannelT, FunctionIdT, SequenceNumberT>;
-
-public:
-  MultiThreadedRPCEndpoint(ChannelT &C, bool LazyAutoNegotiation)
-      : BaseClass(C, LazyAutoNegotiation) {}
-
-  /// Add a handler for the given RPC function.
-  /// This installs the given handler functor for the given RPCFunction, and
-  /// makes the RPC function available for negotiation/calling from the remote.
-  template <typename Func, typename HandlerT>
-  void addHandler(HandlerT Handler) {
-    return this->template addHandlerImpl<Func>(std::move(Handler));
-  }
-
-  /// Add a class-method as a handler.
-  template <typename Func, typename ClassT, typename RetT, typename... ArgTs>
-  void addHandler(ClassT &Object, RetT (ClassT::*Method)(ArgTs...)) {
-    addHandler<Func>(
-        detail::MemberFnWrapper<ClassT, RetT, ArgTs...>(Object, Method));
-  }
-
-  template <typename Func, typename HandlerT>
-  void addAsyncHandler(HandlerT Handler) {
-    return this->template addAsyncHandlerImpl<Func>(std::move(Handler));
-  }
-
-  /// Add a class-method as a handler.
-  template <typename Func, typename ClassT, typename RetT, typename... ArgTs>
-  void addAsyncHandler(ClassT &Object, RetT (ClassT::*Method)(ArgTs...)) {
-    addAsyncHandler<Func>(
-        detail::MemberFnWrapper<ClassT, RetT, ArgTs...>(Object, Method));
-  }
-
-  /// Return type for non-blocking call primitives.
-  template <typename Func>
-  using NonBlockingCallResult = typename detail::ResultTraits<
-      typename Func::ReturnType>::ReturnFutureType;
-
-  /// Call Func on Channel C. Does not block, does not call send. Returns a pair
-  /// of a future result and the sequence number assigned to the result.
-  ///
-  /// This utility function is primarily used for single-threaded mode support,
-  /// where the sequence number can be used to wait for the corresponding
-  /// result. In multi-threaded mode the appendCallNB method, which does not
-  /// return the sequence numeber, should be preferred.
-  template <typename Func, typename... ArgTs>
-  Expected<NonBlockingCallResult<Func>> appendCallNB(const ArgTs &...Args) {
-    using RTraits = detail::ResultTraits<typename Func::ReturnType>;
-    using ErrorReturn = typename RTraits::ErrorReturnType;
-    using ErrorReturnPromise = typename RTraits::ReturnPromiseType;
-
-    ErrorReturnPromise Promise;
-    auto FutureResult = Promise.get_future();
-
-    if (auto Err = this->template appendCallAsync<Func>(
-            [Promise = std::move(Promise)](ErrorReturn RetOrErr) mutable {
-              Promise.set_value(std::move(RetOrErr));
-              return Error::success();
-            },
-            Args...)) {
-      RTraits::consumeAbandoned(FutureResult.get());
-      return std::move(Err);
-    }
-    return std::move(FutureResult);
-  }
-
-  /// The same as appendCallNBWithSeq, except that it calls C.send() to
-  /// flush the channel after serializing the call.
-  template <typename Func, typename... ArgTs>
-  Expected<NonBlockingCallResult<Func>> callNB(const ArgTs &...Args) {
-    auto Result = appendCallNB<Func>(Args...);
-    if (!Result)
-      return Result;
-    if (auto Err = this->C.send()) {
-      this->abandonPendingResponses();
-      detail::ResultTraits<typename Func::ReturnType>::consumeAbandoned(
-          std::move(Result->get()));
-      return std::move(Err);
-    }
-    return Result;
-  }
-
-  /// Call Func on Channel C. Blocks waiting for a result. Returns an Error
-  /// for void functions or an Expected<T> for functions returning a T.
-  ///
-  /// This function is for use in threaded code where another thread is
-  /// handling responses and incoming calls.
-  template <typename Func, typename... ArgTs,
-            typename AltRetT = typename Func::ReturnType>
-  typename detail::ResultTraits<AltRetT>::ErrorReturnType
-  callB(const ArgTs &...Args) {
-    if (auto FutureResOrErr = callNB<Func>(Args...))
-      return FutureResOrErr->get();
-    else
-      return FutureResOrErr.takeError();
-  }
-
-  /// Handle incoming RPC calls.
-  Error handlerLoop() {
-    while (true)
-      if (auto Err = this->handleOne())
-        return Err;
-    return Error::success();
-  }
-};
-
-template <typename ChannelT, typename FunctionIdT = uint32_t,
-          typename SequenceNumberT = uint32_t>
-class SingleThreadedRPCEndpoint
-    : public detail::RPCEndpointBase<
-          SingleThreadedRPCEndpoint<ChannelT, FunctionIdT, SequenceNumberT>,
-          ChannelT, FunctionIdT, SequenceNumberT> {
-private:
-  using BaseClass = detail::RPCEndpointBase<
-      SingleThreadedRPCEndpoint<ChannelT, FunctionIdT, SequenceNumberT>,
-      ChannelT, FunctionIdT, SequenceNumberT>;
-
-public:
-  SingleThreadedRPCEndpoint(ChannelT &C, bool LazyAutoNegotiation)
-      : BaseClass(C, LazyAutoNegotiation) {}
-
-  template <typename Func, typename HandlerT>
-  void addHandler(HandlerT Handler) {
-    return this->template addHandlerImpl<Func>(std::move(Handler));
-  }
-
-  template <typename Func, typename ClassT, typename RetT, typename... ArgTs>
-  void addHandler(ClassT &Object, RetT (ClassT::*Method)(ArgTs...)) {
-    addHandler<Func>(
-        detail::MemberFnWrapper<ClassT, RetT, ArgTs...>(Object, Method));
-  }
-
-  template <typename Func, typename HandlerT>
-  void addAsyncHandler(HandlerT Handler) {
-    return this->template addAsyncHandlerImpl<Func>(std::move(Handler));
-  }
-
-  /// Add a class-method as a handler.
-  template <typename Func, typename ClassT, typename RetT, typename... ArgTs>
-  void addAsyncHandler(ClassT &Object, RetT (ClassT::*Method)(ArgTs...)) {
-    addAsyncHandler<Func>(
-        detail::MemberFnWrapper<ClassT, RetT, ArgTs...>(Object, Method));
-  }
-
-  template <typename Func, typename... ArgTs,
-            typename AltRetT = typename Func::ReturnType>
-  typename detail::ResultTraits<AltRetT>::ErrorReturnType
-  callB(const ArgTs &...Args) {
-    bool ReceivedResponse = false;
-    using AltRetTraits = detail::ResultTraits<AltRetT>;
-    using ResultType = typename AltRetTraits::ErrorReturnType;
-    ResultType Result = AltRetTraits::createBlankErrorReturnValue();
-
-    // We have to 'Check' result (which we know is in a success state at this
-    // point) so that it can be overwritten in the async handler.
-    (void)!!Result;
-
-    if (Error Err = this->template appendCallAsync<Func>(
-            [&](ResultType R) {
-              Result = std::move(R);
-              ReceivedResponse = true;
-              return Error::success();
-            },
-            Args...)) {
-      AltRetTraits::consumeAbandoned(std::move(Result));
-      return AltRetTraits::returnError(std::move(Err));
-    }
-
-    if (Error Err = this->C.send()) {
-      AltRetTraits::consumeAbandoned(std::move(Result));
-      return AltRetTraits::returnError(std::move(Err));
-    }
-
-    while (!ReceivedResponse) {
-      if (Error Err = this->handleOne()) {
-        AltRetTraits::consumeAbandoned(std::move(Result));
-        return AltRetTraits::returnError(std::move(Err));
-      }
-    }
-
-    return Result;
-  }
-};
-
-/// Asynchronous dispatch for a function on an RPC endpoint.
-template <typename RPCClass, typename Func> class RPCAsyncDispatch {
-public:
-  RPCAsyncDispatch(RPCClass &Endpoint) : Endpoint(Endpoint) {}
-
-  template <typename HandlerT, typename... ArgTs>
-  Error operator()(HandlerT Handler, const ArgTs &...Args) const {
-    return Endpoint.template appendCallAsync<Func>(std::move(Handler), Args...);
-  }
-
-private:
-  RPCClass &Endpoint;
-};
-
-/// Construct an asynchronous dispatcher from an RPC endpoint and a Func.
-template <typename Func, typename RPCEndpointT>
-RPCAsyncDispatch<RPCEndpointT, Func> rpcAsyncDispatch(RPCEndpointT &Endpoint) {
-  return RPCAsyncDispatch<RPCEndpointT, Func>(Endpoint);
-}
-
-/// Allows a set of asynchrounous calls to be dispatched, and then
-///        waited on as a group.
-class ParallelCallGroup {
-public:
-  ParallelCallGroup() = default;
-  ParallelCallGroup(const ParallelCallGroup &) = delete;
-  ParallelCallGroup &operator=(const ParallelCallGroup &) = delete;
-
-  /// Make as asynchronous call.
-  template <typename AsyncDispatcher, typename HandlerT, typename... ArgTs>
-  Error call(const AsyncDispatcher &AsyncDispatch, HandlerT Handler,
-             const ArgTs &...Args) {
-    // Increment the count of outstanding calls. This has to happen before
-    // we invoke the call, as the handler may (depending on scheduling)
-    // be run immediately on another thread, and we don't want the decrement
-    // in the wrapped handler below to run before the increment.
-    {
-      std::unique_lock<std::mutex> Lock(M);
-      ++NumOutstandingCalls;
-    }
-
-    // Wrap the user handler in a lambda that will decrement the
-    // outstanding calls count, then poke the condition variable.
-    using ArgType = typename detail::ResponseHandlerArg<
-        typename detail::HandlerTraits<HandlerT>::Type>::ArgType;
-    auto WrappedHandler = [this, Handler = std::move(Handler)](ArgType Arg) {
-      auto Err = Handler(std::move(Arg));
-      std::unique_lock<std::mutex> Lock(M);
-      --NumOutstandingCalls;
-      CV.notify_all();
-      return Err;
-    };
-
-    return AsyncDispatch(std::move(WrappedHandler), Args...);
-  }
-
-  /// Blocks until all calls have been completed and their return value
-  ///        handlers run.
-  void wait() {
-    std::unique_lock<std::mutex> Lock(M);
-    while (NumOutstandingCalls > 0)
-      CV.wait(Lock);
-  }
-
-private:
-  std::mutex M;
-  std::condition_variable CV;
-  uint32_t NumOutstandingCalls = 0;
-};
-
-/// Convenience class for grouping RPCFunctions into APIs that can be
-///        negotiated as a block.
-///
-template <typename... Funcs> class APICalls {
-public:
-  /// Test whether this API contains Function F.
-  template <typename F> class Contains {
-  public:
-    static const bool value = false;
-  };
-
-  /// Negotiate all functions in this API.
-  template <typename RPCEndpoint> static Error negotiate(RPCEndpoint &R) {
-    return Error::success();
-  }
-};
-
-template <typename Func, typename... Funcs> class APICalls<Func, Funcs...> {
-public:
-  template <typename F> class Contains {
-  public:
-    static const bool value = std::is_same<F, Func>::value |
-                              APICalls<Funcs...>::template Contains<F>::value;
-  };
-
-  template <typename RPCEndpoint> static Error negotiate(RPCEndpoint &R) {
-    if (auto Err = R.template negotiateFunction<Func>())
-      return Err;
-    return APICalls<Funcs...>::negotiate(R);
-  }
-};
-
-template <typename... InnerFuncs, typename... Funcs>
-class APICalls<APICalls<InnerFuncs...>, Funcs...> {
-public:
-  template <typename F> class Contains {
-  public:
-    static const bool value =
-        APICalls<InnerFuncs...>::template Contains<F>::value |
-        APICalls<Funcs...>::template Contains<F>::value;
-  };
-
-  template <typename RPCEndpoint> static Error negotiate(RPCEndpoint &R) {
-    if (auto Err = APICalls<InnerFuncs...>::negotiate(R))
-      return Err;
-    return APICalls<Funcs...>::negotiate(R);
-  }
-};
-
-} // end namespace shared
-} // end namespace orc
-} // end namespace llvm
-
-#endif // LLVM_EXECUTIONENGINE_ORC_SHARED_RPCUTILS_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/RawByteChannel.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/RawByteChannel.h
deleted file mode 100644
index 2ee471939251..000000000000
--- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/RawByteChannel.h
+++ /dev/null
@@ -1,183 +0,0 @@
-//===- RawByteChannel.h -----------------------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_EXECUTIONENGINE_ORC_SHARED_RAWBYTECHANNEL_H
-#define LLVM_EXECUTIONENGINE_ORC_SHARED_RAWBYTECHANNEL_H
-
-#include "llvm/ADT/StringRef.h"
-#include "llvm/ExecutionEngine/Orc/Shared/Serialization.h"
-#include "llvm/Support/Endian.h"
-#include "llvm/Support/Error.h"
-#include <cstdint>
-#include <mutex>
-#include <string>
-#include <type_traits>
-
-namespace llvm {
-namespace orc {
-namespace shared {
-
-/// Interface for byte-streams to be used with ORC Serialization.
-class RawByteChannel {
-public:
-  virtual ~RawByteChannel() = default;
-
-  /// Read Size bytes from the stream into *Dst.
-  virtual Error readBytes(char *Dst, unsigned Size) = 0;
-
-  /// Read size bytes from *Src and append them to the stream.
-  virtual Error appendBytes(const char *Src, unsigned Size) = 0;
-
-  /// Flush the stream if possible.
-  virtual Error send() = 0;
-
-  /// Notify the channel that we're starting a message send.
-  /// Locks the channel for writing.
-  template <typename FunctionIdT, typename SequenceIdT>
-  Error startSendMessage(const FunctionIdT &FnId, const SequenceIdT &SeqNo) {
-    writeLock.lock();
-    if (auto Err = serializeSeq(*this, FnId, SeqNo)) {
-      writeLock.unlock();
-      return Err;
-    }
-    return Error::success();
-  }
-
-  /// Notify the channel that we're ending a message send.
-  /// Unlocks the channel for writing.
-  Error endSendMessage() {
-    writeLock.unlock();
-    return Error::success();
-  }
-
-  /// Notify the channel that we're starting a message receive.
-  /// Locks the channel for reading.
-  template <typename FunctionIdT, typename SequenceNumberT>
-  Error startReceiveMessage(FunctionIdT &FnId, SequenceNumberT &SeqNo) {
-    readLock.lock();
-    if (auto Err = deserializeSeq(*this, FnId, SeqNo)) {
-      readLock.unlock();
-      return Err;
-    }
-    return Error::success();
-  }
-
-  /// Notify the channel that we're ending a message receive.
-  /// Unlocks the channel for reading.
-  Error endReceiveMessage() {
-    readLock.unlock();
-    return Error::success();
-  }
-
-  /// Get the lock for stream reading.
-  std::mutex &getReadLock() { return readLock; }
-
-  /// Get the lock for stream writing.
-  std::mutex &getWriteLock() { return writeLock; }
-
-private:
-  std::mutex readLock, writeLock;
-};
-
-template <typename ChannelT, typename T>
-class SerializationTraits<
-    ChannelT, T, T,
-    std::enable_if_t<
-        std::is_base_of<RawByteChannel, ChannelT>::value &&
-        (std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value ||
-         std::is_same<T, uint16_t>::value || std::is_same<T, int16_t>::value ||
-         std::is_same<T, uint32_t>::value || std::is_same<T, int32_t>::value ||
-         std::is_same<T, uint64_t>::value || std::is_same<T, int64_t>::value ||
-         std::is_same<T, char>::value)>> {
-public:
-  static Error serialize(ChannelT &C, T V) {
-    support::endian::byte_swap<T, support::big>(V);
-    return C.appendBytes(reinterpret_cast<const char *>(&V), sizeof(T));
-  };
-
-  static Error deserialize(ChannelT &C, T &V) {
-    if (auto Err = C.readBytes(reinterpret_cast<char *>(&V), sizeof(T)))
-      return Err;
-    support::endian::byte_swap<T, support::big>(V);
-    return Error::success();
-  };
-};
-
-template <typename ChannelT>
-class SerializationTraits<
-    ChannelT, bool, bool,
-    std::enable_if_t<std::is_base_of<RawByteChannel, ChannelT>::value>> {
-public:
-  static Error serialize(ChannelT &C, bool V) {
-    uint8_t Tmp = V ? 1 : 0;
-    if (auto Err = C.appendBytes(reinterpret_cast<const char *>(&Tmp), 1))
-      return Err;
-    return Error::success();
-  }
-
-  static Error deserialize(ChannelT &C, bool &V) {
-    uint8_t Tmp = 0;
-    if (auto Err = C.readBytes(reinterpret_cast<char *>(&Tmp), 1))
-      return Err;
-    V = Tmp != 0;
-    return Error::success();
-  }
-};
-
-template <typename ChannelT>
-class SerializationTraits<
-    ChannelT, std::string, StringRef,
-    std::enable_if_t<std::is_base_of<RawByteChannel, ChannelT>::value>> {
-public:
-  /// Serialization channel serialization for std::strings.
-  static Error serialize(RawByteChannel &C, StringRef S) {
-    if (auto Err = serializeSeq(C, static_cast<uint64_t>(S.size())))
-      return Err;
-    return C.appendBytes((const char *)S.data(), S.size());
-  }
-};
-
-template <typename ChannelT, typename T>
-class SerializationTraits<
-    ChannelT, std::string, T,
-    std::enable_if_t<std::is_base_of<RawByteChannel, ChannelT>::value &&
-                     (std::is_same<T, const char *>::value ||
-                      std::is_same<T, char *>::value)>> {
-public:
-  static Error serialize(RawByteChannel &C, const char *S) {
-    return SerializationTraits<ChannelT, std::string, StringRef>::serialize(C,
-                                                                            S);
-  }
-};
-
-template <typename ChannelT>
-class SerializationTraits<
-    ChannelT, std::string, std::string,
-    std::enable_if_t<std::is_base_of<RawByteChannel, ChannelT>::value>> {
-public:
-  /// Serialization channel serialization for std::strings.
-  static Error serialize(RawByteChannel &C, const std::string &S) {
-    return SerializationTraits<ChannelT, std::string, StringRef>::serialize(C,
-                                                                            S);
-  }
-
-  /// Serialization channel deserialization for std::strings.
-  static Error deserialize(RawByteChannel &C, std::string &S) {
-    uint64_t Count = 0;
-    if (auto Err = deserializeSeq(C, Count))
-      return Err;
-    S.resize(Count);
-    return C.readBytes(&S[0], Count);
-  }
-};
-
-} // end namespace shared
-} // end namespace orc
-} // end namespace llvm
-
-#endif // LLVM_EXECUTIONENGINE_ORC_SHARED_RAWBYTECHANNEL_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/Serialization.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/Serialization.h
deleted file mode 100644
index 0ea483ba2abb..000000000000
--- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/Serialization.h
+++ /dev/null
@@ -1,769 +0,0 @@
-//===- Serialization.h ------------------------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_EXECUTIONENGINE_ORC_SHARED_SERIALIZATION_H
-#define LLVM_EXECUTIONENGINE_ORC_SHARED_SERIALIZATION_H
-
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ExecutionEngine/Orc/Shared/OrcError.h"
-#include "llvm/Support/thread.h"
-#include <map>
-#include <mutex>
-#include <set>
-#include <sstream>
-#include <string>
-#include <vector>
-
-namespace llvm {
-namespace orc {
-namespace shared {
-
-template <typename T> class SerializationTypeName;
-
-/// TypeNameSequence is a utility for rendering sequences of types to a string
-/// by rendering each type, separated by ", ".
-template <typename... ArgTs> class SerializationTypeNameSequence {};
-
-/// Render an empty TypeNameSequence to an ostream.
-template <typename OStream>
-OStream &operator<<(OStream &OS, const SerializationTypeNameSequence<> &V) {
-  return OS;
-}
-
-/// Render a TypeNameSequence of a single type to an ostream.
-template <typename OStream, typename ArgT>
-OStream &operator<<(OStream &OS, const SerializationTypeNameSequence<ArgT> &V) {
-  OS << SerializationTypeName<ArgT>::getName();
-  return OS;
-}
-
-/// Render a TypeNameSequence of more than one type to an ostream.
-template <typename OStream, typename ArgT1, typename ArgT2, typename... ArgTs>
-OStream &
-operator<<(OStream &OS,
-           const SerializationTypeNameSequence<ArgT1, ArgT2, ArgTs...> &V) {
-  OS << SerializationTypeName<ArgT1>::getName() << ", "
-     << SerializationTypeNameSequence<ArgT2, ArgTs...>();
-  return OS;
-}
-
-template <> class SerializationTypeName<void> {
-public:
-  static const char *getName() { return "void"; }
-};
-
-template <> class SerializationTypeName<int8_t> {
-public:
-  static const char *getName() { return "int8_t"; }
-};
-
-template <> class SerializationTypeName<uint8_t> {
-public:
-  static const char *getName() { return "uint8_t"; }
-};
-
-template <> class SerializationTypeName<int16_t> {
-public:
-  static const char *getName() { return "int16_t"; }
-};
-
-template <> class SerializationTypeName<uint16_t> {
-public:
-  static const char *getName() { return "uint16_t"; }
-};
-
-template <> class SerializationTypeName<int32_t> {
-public:
-  static const char *getName() { return "int32_t"; }
-};
-
-template <> class SerializationTypeName<uint32_t> {
-public:
-  static const char *getName() { return "uint32_t"; }
-};
-
-template <> class SerializationTypeName<int64_t> {
-public:
-  static const char *getName() { return "int64_t"; }
-};
-
-template <> class SerializationTypeName<uint64_t> {
-public:
-  static const char *getName() { return "uint64_t"; }
-};
-
-template <> class SerializationTypeName<bool> {
-public:
-  static const char *getName() { return "bool"; }
-};
-
-template <> class SerializationTypeName<std::string> {
-public:
-  static const char *getName() { return "std::string"; }
-};
-
-template <> class SerializationTypeName<Error> {
-public:
-  static const char *getName() { return "Error"; }
-};
-
-template <typename T> class SerializationTypeName<Expected<T>> {
-public:
-  static const char *getName() {
-    static std::string Name = [] {
-      std::string Name;
-      raw_string_ostream(Name)
-          << "Expected<" << SerializationTypeNameSequence<T>() << ">";
-      return Name;
-    }();
-    return Name.data();
-  }
-};
-
-template <typename T1, typename T2>
-class SerializationTypeName<std::pair<T1, T2>> {
-public:
-  static const char *getName() {
-    static std::string Name = [] {
-      std::string Name;
-      raw_string_ostream(Name)
-          << "std::pair<" << SerializationTypeNameSequence<T1, T2>() << ">";
-      return Name;
-    }();
-    return Name.data();
-  }
-};
-
-template <typename... ArgTs> class SerializationTypeName<std::tuple<ArgTs...>> {
-public:
-  static const char *getName() {
-    static std::string Name = [] {
-      std::string Name;
-      raw_string_ostream(Name)
-          << "std::tuple<" << SerializationTypeNameSequence<ArgTs...>() << ">";
-      return Name;
-    }();
-    return Name.data();
-  }
-};
-
-template <typename T> class SerializationTypeName<Optional<T>> {
-public:
-  static const char *getName() {
-    static std::string Name = [] {
-      std::string Name;
-      raw_string_ostream(Name)
-          << "Optional<" << SerializationTypeName<T>::getName() << ">";
-      return Name;
-    }();
-    return Name.data();
-  }
-};
-
-template <typename T> class SerializationTypeName<std::vector<T>> {
-public:
-  static const char *getName() {
-    static std::string Name = [] {
-      std::string Name;
-      raw_string_ostream(Name)
-          << "std::vector<" << SerializationTypeName<T>::getName() << ">";
-      return Name;
-    }();
-    return Name.data();
-  }
-};
-
-template <typename T> class SerializationTypeName<std::set<T>> {
-public:
-  static const char *getName() {
-    static std::string Name = [] {
-      std::string Name;
-      raw_string_ostream(Name)
-          << "std::set<" << SerializationTypeName<T>::getName() << ">";
-      return Name;
-    }();
-    return Name.data();
-  }
-};
-
-template <typename K, typename V> class SerializationTypeName<std::map<K, V>> {
-public:
-  static const char *getName() {
-    static std::string Name = [] {
-      std::string Name;
-      raw_string_ostream(Name)
-          << "std::map<" << SerializationTypeNameSequence<K, V>() << ">";
-      return Name;
-    }();
-    return Name.data();
-  }
-};
-
-/// The SerializationTraits<ChannelT, T> class describes how to serialize and
-/// deserialize an instance of type T to/from an abstract channel of type
-/// ChannelT. It also provides a representation of the type's name via the
-/// getName method.
-///
-/// Specializations of this class should provide the following functions:
-///
-///   @code{.cpp}
-///
-///   static const char* getName();
-///   static Error serialize(ChannelT&, const T&);
-///   static Error deserialize(ChannelT&, T&);
-///
-///   @endcode
-///
-/// The third argument of SerializationTraits is intended to support SFINAE.
-/// E.g.:
-///
-///   @code{.cpp}
-///
-///   class MyVirtualChannel { ... };
-///
-///   template <DerivedChannelT>
-///   class SerializationTraits<DerivedChannelT, bool,
-///         std::enable_if_t<
-///           std::is_base_of<VirtChannel, DerivedChannel>::value
-///         >> {
-///   public:
-///     static const char* getName() { ... };
-///   }
-///
-///   @endcode
-template <typename ChannelT, typename WireType,
-          typename ConcreteType = WireType, typename = void>
-class SerializationTraits;
-
-template <typename ChannelT> class SequenceTraits {
-public:
-  static Error emitSeparator(ChannelT &C) { return Error::success(); }
-  static Error consumeSeparator(ChannelT &C) { return Error::success(); }
-};
-
-/// Utility class for serializing sequences of values of varying types.
-/// Specializations of this class contain 'serialize' and 'deserialize' methods
-/// for the given channel. The ArgTs... list will determine the "over-the-wire"
-/// types to be serialized. The serialize and deserialize methods take a list
-/// CArgTs... ("caller arg types") which must be the same length as ArgTs...,
-/// but may be different types from ArgTs, provided that for each CArgT there
-/// is a SerializationTraits specialization
-/// SerializeTraits<ChannelT, ArgT, CArgT> with methods that can serialize the
-/// caller argument to over-the-wire value.
-template <typename ChannelT, typename... ArgTs> class SequenceSerialization;
-
-template <typename ChannelT> class SequenceSerialization<ChannelT> {
-public:
-  static Error serialize(ChannelT &C) { return Error::success(); }
-  static Error deserialize(ChannelT &C) { return Error::success(); }
-};
-
-template <typename ChannelT, typename ArgT>
-class SequenceSerialization<ChannelT, ArgT> {
-public:
-  template <typename CArgT> static Error serialize(ChannelT &C, CArgT &&CArg) {
-    return SerializationTraits<ChannelT, ArgT, std::decay_t<CArgT>>::serialize(
-        C, std::forward<CArgT>(CArg));
-  }
-
-  template <typename CArgT> static Error deserialize(ChannelT &C, CArgT &CArg) {
-    return SerializationTraits<ChannelT, ArgT, CArgT>::deserialize(C, CArg);
-  }
-};
-
-template <typename ChannelT, typename ArgT, typename... ArgTs>
-class SequenceSerialization<ChannelT, ArgT, ArgTs...> {
-public:
-  template <typename CArgT, typename... CArgTs>
-  static Error serialize(ChannelT &C, CArgT &&CArg, CArgTs &&...CArgs) {
-    if (auto Err =
-            SerializationTraits<ChannelT, ArgT, std::decay_t<CArgT>>::serialize(
-                C, std::forward<CArgT>(CArg)))
-      return Err;
-    if (auto Err = SequenceTraits<ChannelT>::emitSeparator(C))
-      return Err;
-    return SequenceSerialization<ChannelT, ArgTs...>::serialize(
-        C, std::forward<CArgTs>(CArgs)...);
-  }
-
-  template <typename CArgT, typename... CArgTs>
-  static Error deserialize(ChannelT &C, CArgT &CArg, CArgTs &...CArgs) {
-    if (auto Err =
-            SerializationTraits<ChannelT, ArgT, CArgT>::deserialize(C, CArg))
-      return Err;
-    if (auto Err = SequenceTraits<ChannelT>::consumeSeparator(C))
-      return Err;
-    return SequenceSerialization<ChannelT, ArgTs...>::deserialize(C, CArgs...);
-  }
-};
-
-template <typename ChannelT, typename... ArgTs>
-Error serializeSeq(ChannelT &C, ArgTs &&...Args) {
-  return SequenceSerialization<ChannelT, std::decay_t<ArgTs>...>::serialize(
-      C, std::forward<ArgTs>(Args)...);
-}
-
-template <typename ChannelT, typename... ArgTs>
-Error deserializeSeq(ChannelT &C, ArgTs &...Args) {
-  return SequenceSerialization<ChannelT, ArgTs...>::deserialize(C, Args...);
-}
-
-template <typename ChannelT> class SerializationTraits<ChannelT, Error> {
-public:
-  using WrappedErrorSerializer =
-      std::function<Error(ChannelT &C, const ErrorInfoBase &)>;
-
-  using WrappedErrorDeserializer =
-      std::function<Error(ChannelT &C, Error &Err)>;
-
-  template <typename ErrorInfoT, typename SerializeFtor,
-            typename DeserializeFtor>
-  static void registerErrorType(std::string Name, SerializeFtor Serialize,
-                                DeserializeFtor Deserialize) {
-    assert(!Name.empty() &&
-           "The empty string is reserved for the Success value");
-
-    const std::string *KeyName = nullptr;
-    {
-      // We're abusing the stability of std::map here: We take a reference to
-      // the key of the deserializers map to save us from duplicating the string
-      // in the serializer. This should be changed to use a stringpool if we
-      // switch to a map type that may move keys in memory.
-      std::lock_guard<std::recursive_mutex> Lock(DeserializersMutex);
-      auto I = Deserializers.insert(
-          Deserializers.begin(),
-          std::make_pair(std::move(Name), std::move(Deserialize)));
-      KeyName = &I->first;
-    }
-
-    {
-      assert(KeyName != nullptr && "No keyname pointer");
-      std::lock_guard<std::recursive_mutex> Lock(SerializersMutex);
-      Serializers[ErrorInfoT::classID()] =
-          [KeyName, Serialize = std::move(Serialize)](
-              ChannelT &C, const ErrorInfoBase &EIB) -> Error {
-        assert(EIB.dynamicClassID() == ErrorInfoT::classID() &&
-               "Serializer called for wrong error type");
-        if (auto Err = serializeSeq(C, *KeyName))
-          return Err;
-        return Serialize(C, static_cast<const ErrorInfoT &>(EIB));
-      };
-    }
-  }
-
-  static Error serialize(ChannelT &C, Error &&Err) {
-    std::lock_guard<std::recursive_mutex> Lock(SerializersMutex);
-
-    if (!Err)
-      return serializeSeq(C, std::string());
-
-    return handleErrors(std::move(Err), [&C](const ErrorInfoBase &EIB) {
-      auto SI = Serializers.find(EIB.dynamicClassID());
-      if (SI == Serializers.end())
-        return serializeAsStringError(C, EIB);
-      return (SI->second)(C, EIB);
-    });
-  }
-
-  static Error deserialize(ChannelT &C, Error &Err) {
-    std::lock_guard<std::recursive_mutex> Lock(DeserializersMutex);
-
-    std::string Key;
-    if (auto Err = deserializeSeq(C, Key))
-      return Err;
-
-    if (Key.empty()) {
-      ErrorAsOutParameter EAO(&Err);
-      Err = Error::success();
-      return Error::success();
-    }
-
-    auto DI = Deserializers.find(Key);
-    assert(DI != Deserializers.end() && "No deserializer for error type");
-    return (DI->second)(C, Err);
-  }
-
-private:
-  static Error serializeAsStringError(ChannelT &C, const ErrorInfoBase &EIB) {
-    std::string ErrMsg;
-    {
-      raw_string_ostream ErrMsgStream(ErrMsg);
-      EIB.log(ErrMsgStream);
-    }
-    return serialize(C, make_error<StringError>(std::move(ErrMsg),
-                                                inconvertibleErrorCode()));
-  }
-
-  static std::recursive_mutex SerializersMutex;
-  static std::recursive_mutex DeserializersMutex;
-  static std::map<const void *, WrappedErrorSerializer> Serializers;
-  static std::map<std::string, WrappedErrorDeserializer> Deserializers;
-};
-
-template <typename ChannelT>
-std::recursive_mutex SerializationTraits<ChannelT, Error>::SerializersMutex;
-
-template <typename ChannelT>
-std::recursive_mutex SerializationTraits<ChannelT, Error>::DeserializersMutex;
-
-template <typename ChannelT>
-std::map<const void *,
-         typename SerializationTraits<ChannelT, Error>::WrappedErrorSerializer>
-    SerializationTraits<ChannelT, Error>::Serializers;
-
-template <typename ChannelT>
-std::map<std::string, typename SerializationTraits<
-                          ChannelT, Error>::WrappedErrorDeserializer>
-    SerializationTraits<ChannelT, Error>::Deserializers;
-
-/// Registers a serializer and deserializer for the given error type on the
-/// given channel type.
-template <typename ChannelT, typename ErrorInfoT, typename SerializeFtor,
-          typename DeserializeFtor>
-void registerErrorSerialization(std::string Name, SerializeFtor &&Serialize,
-                                DeserializeFtor &&Deserialize) {
-  SerializationTraits<ChannelT, Error>::template registerErrorType<ErrorInfoT>(
-      std::move(Name), std::forward<SerializeFtor>(Serialize),
-      std::forward<DeserializeFtor>(Deserialize));
-}
-
-/// Registers serialization/deserialization for StringError.
-template <typename ChannelT> void registerStringError() {
-  static bool AlreadyRegistered = false;
-  if (!AlreadyRegistered) {
-    registerErrorSerialization<ChannelT, StringError>(
-        "StringError",
-        [](ChannelT &C, const StringError &SE) {
-          return serializeSeq(C, SE.getMessage());
-        },
-        [](ChannelT &C, Error &Err) -> Error {
-          ErrorAsOutParameter EAO(&Err);
-          std::string Msg;
-          if (auto E2 = deserializeSeq(C, Msg))
-            return E2;
-          Err = make_error<StringError>(
-              std::move(Msg),
-              orcError(OrcErrorCode::UnknownErrorCodeFromRemote));
-          return Error::success();
-        });
-    AlreadyRegistered = true;
-  }
-}
-
-/// SerializationTraits for Expected<T1> from an Expected<T2>.
-template <typename ChannelT, typename T1, typename T2>
-class SerializationTraits<ChannelT, Expected<T1>, Expected<T2>> {
-public:
-  static Error serialize(ChannelT &C, Expected<T2> &&ValOrErr) {
-    if (ValOrErr) {
-      if (auto Err = serializeSeq(C, true))
-        return Err;
-      return SerializationTraits<ChannelT, T1, T2>::serialize(C, *ValOrErr);
-    }
-    if (auto Err = serializeSeq(C, false))
-      return Err;
-    return serializeSeq(C, ValOrErr.takeError());
-  }
-
-  static Error deserialize(ChannelT &C, Expected<T2> &ValOrErr) {
-    ExpectedAsOutParameter<T2> EAO(&ValOrErr);
-    bool HasValue;
-    if (auto Err = deserializeSeq(C, HasValue))
-      return Err;
-    if (HasValue)
-      return SerializationTraits<ChannelT, T1, T2>::deserialize(C, *ValOrErr);
-    Error Err = Error::success();
-    if (auto E2 = deserializeSeq(C, Err))
-      return E2;
-    ValOrErr = std::move(Err);
-    return Error::success();
-  }
-};
-
-/// SerializationTraits for Expected<T1> from a T2.
-template <typename ChannelT, typename T1, typename T2>
-class SerializationTraits<ChannelT, Expected<T1>, T2> {
-public:
-  static Error serialize(ChannelT &C, T2 &&Val) {
-    return serializeSeq(C, Expected<T2>(std::forward<T2>(Val)));
-  }
-};
-
-/// SerializationTraits for Expected<T1> from an Error.
-template <typename ChannelT, typename T>
-class SerializationTraits<ChannelT, Expected<T>, Error> {
-public:
-  static Error serialize(ChannelT &C, Error &&Err) {
-    return serializeSeq(C, Expected<T>(std::move(Err)));
-  }
-};
-
-/// SerializationTraits default specialization for std::pair.
-template <typename ChannelT, typename T1, typename T2, typename T3, typename T4>
-class SerializationTraits<ChannelT, std::pair<T1, T2>, std::pair<T3, T4>> {
-public:
-  static Error serialize(ChannelT &C, const std::pair<T3, T4> &V) {
-    if (auto Err = SerializationTraits<ChannelT, T1, T3>::serialize(C, V.first))
-      return Err;
-    return SerializationTraits<ChannelT, T2, T4>::serialize(C, V.second);
-  }
-
-  static Error deserialize(ChannelT &C, std::pair<T3, T4> &V) {
-    if (auto Err =
-            SerializationTraits<ChannelT, T1, T3>::deserialize(C, V.first))
-      return Err;
-    return SerializationTraits<ChannelT, T2, T4>::deserialize(C, V.second);
-  }
-};
-
-/// SerializationTraits default specialization for std::tuple.
-template <typename ChannelT, typename... ArgTs>
-class SerializationTraits<ChannelT, std::tuple<ArgTs...>> {
-public:
-  /// RPC channel serialization for std::tuple.
-  static Error serialize(ChannelT &C, const std::tuple<ArgTs...> &V) {
-    return serializeTupleHelper(C, V, std::index_sequence_for<ArgTs...>());
-  }
-
-  /// RPC channel deserialization for std::tuple.
-  static Error deserialize(ChannelT &C, std::tuple<ArgTs...> &V) {
-    return deserializeTupleHelper(C, V, std::index_sequence_for<ArgTs...>());
-  }
-
-private:
-  // Serialization helper for std::tuple.
-  template <size_t... Is>
-  static Error serializeTupleHelper(ChannelT &C, const std::tuple<ArgTs...> &V,
-                                    std::index_sequence<Is...> _) {
-    return serializeSeq(C, std::get<Is>(V)...);
-  }
-
-  // Serialization helper for std::tuple.
-  template <size_t... Is>
-  static Error deserializeTupleHelper(ChannelT &C, std::tuple<ArgTs...> &V,
-                                      std::index_sequence<Is...> _) {
-    return deserializeSeq(C, std::get<Is>(V)...);
-  }
-};
-
-template <typename ChannelT, typename T>
-class SerializationTraits<ChannelT, Optional<T>> {
-public:
-  /// Serialize an Optional<T>.
-  static Error serialize(ChannelT &C, const Optional<T> &O) {
-    if (auto Err = serializeSeq(C, O != None))
-      return Err;
-    if (O)
-      if (auto Err = serializeSeq(C, *O))
-        return Err;
-    return Error::success();
-  }
-
-  /// Deserialize an Optional<T>.
-  static Error deserialize(ChannelT &C, Optional<T> &O) {
-    bool HasValue = false;
-    if (auto Err = deserializeSeq(C, HasValue))
-      return Err;
-    if (HasValue)
-      if (auto Err = deserializeSeq(C, *O))
-        return Err;
-    return Error::success();
-  };
-};
-
-/// SerializationTraits default specialization for std::vector.
-template <typename ChannelT, typename T>
-class SerializationTraits<ChannelT, std::vector<T>> {
-public:
-  /// Serialize a std::vector<T> from std::vector<T>.
-  static Error serialize(ChannelT &C, const std::vector<T> &V) {
-    if (auto Err = serializeSeq(C, static_cast<uint64_t>(V.size())))
-      return Err;
-
-    for (const auto &E : V)
-      if (auto Err = serializeSeq(C, E))
-        return Err;
-
-    return Error::success();
-  }
-
-  /// Deserialize a std::vector<T> to a std::vector<T>.
-  static Error deserialize(ChannelT &C, std::vector<T> &V) {
-    assert(V.empty() &&
-           "Expected default-constructed vector to deserialize into");
-
-    uint64_t Count = 0;
-    if (auto Err = deserializeSeq(C, Count))
-      return Err;
-
-    V.resize(Count);
-    for (auto &E : V)
-      if (auto Err = deserializeSeq(C, E))
-        return Err;
-
-    return Error::success();
-  }
-};
-
-/// Enable vector serialization from an ArrayRef.
-template <typename ChannelT, typename T>
-class SerializationTraits<ChannelT, std::vector<T>, ArrayRef<T>> {
-public:
-  static Error serialize(ChannelT &C, ArrayRef<T> V) {
-    if (auto Err = serializeSeq(C, static_cast<uint64_t>(V.size())))
-      return Err;
-
-    for (const auto &E : V)
-      if (auto Err = serializeSeq(C, E))
-        return Err;
-
-    return Error::success();
-  }
-};
-
-template <typename ChannelT, typename T, typename T2>
-class SerializationTraits<ChannelT, std::set<T>, std::set<T2>> {
-public:
-  /// Serialize a std::set<T> from std::set<T2>.
-  static Error serialize(ChannelT &C, const std::set<T2> &S) {
-    if (auto Err = serializeSeq(C, static_cast<uint64_t>(S.size())))
-      return Err;
-
-    for (const auto &E : S)
-      if (auto Err = SerializationTraits<ChannelT, T, T2>::serialize(C, E))
-        return Err;
-
-    return Error::success();
-  }
-
-  /// Deserialize a std::set<T> to a std::set<T>.
-  static Error deserialize(ChannelT &C, std::set<T2> &S) {
-    assert(S.empty() && "Expected default-constructed set to deserialize into");
-
-    uint64_t Count = 0;
-    if (auto Err = deserializeSeq(C, Count))
-      return Err;
-
-    while (Count-- != 0) {
-      T2 Val;
-      if (auto Err = SerializationTraits<ChannelT, T, T2>::deserialize(C, Val))
-        return Err;
-
-      auto Added = S.insert(Val).second;
-      if (!Added)
-        return make_error<StringError>("Duplicate element in deserialized set",
-                                       orcError(OrcErrorCode::UnknownORCError));
-    }
-
-    return Error::success();
-  }
-};
-
-template <typename ChannelT, typename K, typename V, typename K2, typename V2>
-class SerializationTraits<ChannelT, std::map<K, V>, std::map<K2, V2>> {
-public:
-  /// Serialize a std::map<K, V> from std::map<K2, V2>.
-  static Error serialize(ChannelT &C, const std::map<K2, V2> &M) {
-    if (auto Err = serializeSeq(C, static_cast<uint64_t>(M.size())))
-      return Err;
-
-    for (const auto &E : M) {
-      if (auto Err =
-              SerializationTraits<ChannelT, K, K2>::serialize(C, E.first))
-        return Err;
-      if (auto Err =
-              SerializationTraits<ChannelT, V, V2>::serialize(C, E.second))
-        return Err;
-    }
-
-    return Error::success();
-  }
-
-  /// Deserialize a std::map<K, V> to a std::map<K, V>.
-  static Error deserialize(ChannelT &C, std::map<K2, V2> &M) {
-    assert(M.empty() && "Expected default-constructed map to deserialize into");
-
-    uint64_t Count = 0;
-    if (auto Err = deserializeSeq(C, Count))
-      return Err;
-
-    while (Count-- != 0) {
-      std::pair<K2, V2> Val;
-      if (auto Err =
-              SerializationTraits<ChannelT, K, K2>::deserialize(C, Val.first))
-        return Err;
-
-      if (auto Err =
-              SerializationTraits<ChannelT, V, V2>::deserialize(C, Val.second))
-        return Err;
-
-      auto Added = M.insert(Val).second;
-      if (!Added)
-        return make_error<StringError>("Duplicate element in deserialized map",
-                                       orcError(OrcErrorCode::UnknownORCError));
-    }
-
-    return Error::success();
-  }
-};
-
-template <typename ChannelT, typename K, typename V, typename K2, typename V2>
-class SerializationTraits<ChannelT, std::map<K, V>, DenseMap<K2, V2>> {
-public:
-  /// Serialize a std::map<K, V> from DenseMap<K2, V2>.
-  static Error serialize(ChannelT &C, const DenseMap<K2, V2> &M) {
-    if (auto Err = serializeSeq(C, static_cast<uint64_t>(M.size())))
-      return Err;
-
-    for (auto &E : M) {
-      if (auto Err =
-              SerializationTraits<ChannelT, K, K2>::serialize(C, E.first))
-        return Err;
-
-      if (auto Err =
-              SerializationTraits<ChannelT, V, V2>::serialize(C, E.second))
-        return Err;
-    }
-
-    return Error::success();
-  }
-
-  /// Serialize a std::map<K, V> from DenseMap<K2, V2>.
-  static Error deserialize(ChannelT &C, DenseMap<K2, V2> &M) {
-    assert(M.empty() && "Expected default-constructed map to deserialize into");
-
-    uint64_t Count = 0;
-    if (auto Err = deserializeSeq(C, Count))
-      return Err;
-
-    while (Count-- != 0) {
-      std::pair<K2, V2> Val;
-      if (auto Err =
-              SerializationTraits<ChannelT, K, K2>::deserialize(C, Val.first))
-        return Err;
-
-      if (auto Err =
-              SerializationTraits<ChannelT, V, V2>::deserialize(C, Val.second))
-        return Err;
-
-      auto Added = M.insert(Val).second;
-      if (!Added)
-        return make_error<StringError>("Duplicate element in deserialized map",
-                                       orcError(OrcErrorCode::UnknownORCError));
-    }
-
-    return Error::success();
-  }
-};
-
-} // namespace shared
-} // end namespace orc
-} // end namespace llvm
-
-#endif // LLVM_EXECUTIONENGINE_ORC_SHARED_SERIALIZATION_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h
index 854f1098d5af..9ac13a493e9d 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h
@@ -33,10 +33,12 @@
 #define LLVM_EXECUTIONENGINE_ORC_SHARED_SIMPLEPACKEDSERIALIZATION_H
 
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/SwapByteOrder.h"
 
+#include <limits>
 #include <string>
 #include <tuple>
 #include <type_traits>
@@ -193,13 +195,6 @@ template <typename SPSElementTagT> class SPSSequence;
 /// SPS tag type for strings, which are equivalent to sequences of chars.
 using SPSString = SPSSequence<char>;
 
-/// SPS tag type for executor addresseses.
-class SPSExecutorAddress {};
-
-template <>
-class SPSSerializationTraits<SPSExecutorAddress, uint64_t>
-    : public SPSSerializationTraits<uint64_t, uint64_t> {};
-
 /// SPS tag type for maps.
 ///
 /// SPS maps are just sequences of (Key, Value) tuples.
@@ -289,6 +284,40 @@ public:
   }
 };
 
+/// Trivial ArrayRef<T> -> SPSSequence<SPSElementTagT> serialization.
+template <typename SPSElementTagT, typename T>
+class TrivialSPSSequenceSerialization<SPSElementTagT, ArrayRef<T>> {
+public:
+  static constexpr bool available = true;
+};
+
+/// Specialized SPSSequence<char> -> ArrayRef<char> serialization.
+///
+/// On deserialize, points directly into the input buffer.
+template <> class SPSSerializationTraits<SPSSequence<char>, ArrayRef<char>> {
+public:
+  static size_t size(const ArrayRef<char> &A) {
+    return SPSArgList<uint64_t>::size(static_cast<uint64_t>(A.size())) +
+           A.size();
+  }
+
+  static bool serialize(SPSOutputBuffer &OB, const ArrayRef<char> &A) {
+    if (!SPSArgList<uint64_t>::serialize(OB, static_cast<uint64_t>(A.size())))
+      return false;
+    return OB.write(A.data(), A.size());
+  }
+
+  static bool deserialize(SPSInputBuffer &IB, ArrayRef<char> &A) {
+    uint64_t Size;
+    if (!SPSArgList<uint64_t>::deserialize(IB, Size))
+      return false;
+    if (Size > std::numeric_limits<size_t>::max())
+      return false;
+    A = {IB.data(), static_cast<size_t>(Size)};
+    return IB.skip(Size);
+  }
+};
+
 /// 'Trivial' sequence serialization: Sequence is serialized as a uint64_t size
 /// followed by a for-earch loop over the elements of the sequence to serialize
 /// each of them.
@@ -330,6 +359,44 @@ public:
   }
 };
 
+/// SPSTuple serialization for std::tuple.
+template <typename... SPSTagTs, typename... Ts>
+class SPSSerializationTraits<SPSTuple<SPSTagTs...>, std::tuple<Ts...>> {
+private:
+  using TupleArgList = typename SPSTuple<SPSTagTs...>::AsArgList;
+  using ArgIndices = std::make_index_sequence<sizeof...(Ts)>;
+
+  template <std::size_t... I>
+  static size_t size(const std::tuple<Ts...> &T, std::index_sequence<I...>) {
+    return TupleArgList::size(std::get<I>(T)...);
+  }
+
+  template <std::size_t... I>
+  static bool serialize(SPSOutputBuffer &OB, const std::tuple<Ts...> &T,
+                        std::index_sequence<I...>) {
+    return TupleArgList::serialize(OB, std::get<I>(T)...);
+  }
+
+  template <std::size_t... I>
+  static bool deserialize(SPSInputBuffer &IB, std::tuple<Ts...> &T,
+                          std::index_sequence<I...>) {
+    return TupleArgList::deserialize(IB, std::get<I>(T)...);
+  }
+
+public:
+  static size_t size(const std::tuple<Ts...> &T) {
+    return size(T, ArgIndices{});
+  }
+
+  static bool serialize(SPSOutputBuffer &OB, const std::tuple<Ts...> &T) {
+    return serialize(OB, T, ArgIndices{});
+  }
+
+  static bool deserialize(SPSInputBuffer &IB, std::tuple<Ts...> &T) {
+    return deserialize(IB, T, ArgIndices{});
+  }
+};
+
 /// SPSTuple serialization for std::pair.
 template <typename SPSTagT1, typename SPSTagT2, typename T1, typename T2>
 class SPSSerializationTraits<SPSTuple<SPSTagT1, SPSTagT2>, std::pair<T1, T2>> {
@@ -380,6 +447,49 @@ public:
   }
 };
 
+/// Serialization for StringMap<ValueT>s.
+template <typename SPSValueT, typename ValueT>
+class SPSSerializationTraits<SPSSequence<SPSTuple<SPSString, SPSValueT>>,
+                             StringMap<ValueT>> {
+public:
+  static size_t size(const StringMap<ValueT> &M) {
+    size_t Sz = SPSArgList<uint64_t>::size(static_cast<uint64_t>(M.size()));
+    for (auto &E : M)
+      Sz += SPSArgList<SPSString, SPSValueT>::size(E.first(), E.second);
+    return Sz;
+  }
+
+  static bool serialize(SPSOutputBuffer &OB, const StringMap<ValueT> &M) {
+    if (!SPSArgList<uint64_t>::serialize(OB, static_cast<uint64_t>(M.size())))
+      return false;
+
+    for (auto &E : M)
+      if (!SPSArgList<SPSString, SPSValueT>::serialize(OB, E.first(), E.second))
+        return false;
+
+    return true;
+  }
+
+  static bool deserialize(SPSInputBuffer &IB, StringMap<ValueT> &M) {
+    uint64_t Size;
+    assert(M.empty() && "M already contains elements");
+
+    if (!SPSArgList<uint64_t>::deserialize(IB, Size))
+      return false;
+
+    while (Size--) {
+      StringRef S;
+      ValueT V;
+      if (!SPSArgList<SPSString, SPSValueT>::deserialize(IB, S, V))
+        return false;
+      if (!M.insert(std::make_pair(S, V)).second)
+        return false;
+    }
+
+    return true;
+  }
+};
+
 /// SPS tag type for errors.
 class SPSError;
 
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.h
new file mode 100644
index 000000000000..9e074ed1f931
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.h
@@ -0,0 +1,235 @@
+//===--- SimpleRemoteEPCUtils.h - Utils for Simple Remote EPC ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Message definitions and other utilities for SimpleRemoteEPC and
+// SimpleRemoteEPCServer.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_SHARED_SIMPLEREMOTEEPCUTILS_H
+#define LLVM_EXECUTIONENGINE_ORC_SHARED_SIMPLEREMOTEEPCUTILS_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h"
+#include "llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h"
+#include "llvm/Support/Error.h"
+
+#include <atomic>
+#include <mutex>
+#include <string>
+#include <thread>
+
+namespace llvm {
+namespace orc {
+
+namespace SimpleRemoteEPCDefaultBootstrapSymbolNames {
+extern const char *ExecutorSessionObjectName;
+extern const char *DispatchFnName;
+} // end namespace SimpleRemoteEPCDefaultBootstrapSymbolNames
+
+enum class SimpleRemoteEPCOpcode : uint8_t {
+  Setup,
+  Hangup,
+  Result,
+  CallWrapper,
+  LastOpC = CallWrapper
+};
+
+struct SimpleRemoteEPCExecutorInfo {
+  std::string TargetTriple;
+  uint64_t PageSize;
+  StringMap<ExecutorAddr> BootstrapSymbols;
+};
+
+using SimpleRemoteEPCArgBytesVector = SmallVector<char, 128>;
+
+class SimpleRemoteEPCTransportClient {
+public:
+  enum HandleMessageAction { ContinueSession, EndSession };
+
+  virtual ~SimpleRemoteEPCTransportClient();
+
+  /// Handle receipt of a message.
+  ///
+  /// Returns an Error if the message cannot be handled, 'EndSession' if the
+  /// client will not accept any further messages, and 'ContinueSession'
+  /// otherwise.
+  virtual Expected<HandleMessageAction>
+  handleMessage(SimpleRemoteEPCOpcode OpC, uint64_t SeqNo, ExecutorAddr TagAddr,
+                SimpleRemoteEPCArgBytesVector ArgBytes) = 0;
+
+  /// Handle a disconnection from the underlying transport. No further messages
+  /// should be sent to handleMessage after this is called.
+  /// Err may contain an Error value indicating unexpected disconnection. This
+  /// allows clients to log such errors, but no attempt should be made at
+  /// recovery (which should be handled inside the transport class, if it is
+  /// supported at all).
+  virtual void handleDisconnect(Error Err) = 0;
+};
+
+class SimpleRemoteEPCTransport {
+public:
+  virtual ~SimpleRemoteEPCTransport();
+
+  /// Called during setup of the client to indicate that the client is ready
+  /// to receive messages.
+  ///
+  /// Transport objects should not access the client until this method is
+  /// called.
+  virtual Error start() = 0;
+
+  /// Send a SimpleRemoteEPC message.
+  ///
+  /// This function may be called concurrently. Subclasses should implement
+  /// locking if required for the underlying transport.
+  virtual Error sendMessage(SimpleRemoteEPCOpcode OpC, uint64_t SeqNo,
+                            ExecutorAddr TagAddr, ArrayRef<char> ArgBytes) = 0;
+
+  /// Trigger disconnection from the transport. The implementation should
+  /// respond by calling handleDisconnect on the client once disconnection
+  /// is complete. May be called more than once and from different threads.
+  virtual void disconnect() = 0;
+};
+
+/// Uses read/write on FileDescriptors for transport.
+class FDSimpleRemoteEPCTransport : public SimpleRemoteEPCTransport {
+public:
+  /// Create a FDSimpleRemoteEPCTransport using the given FDs for
+  /// reading (InFD) and writing (OutFD).
+  static Expected<std::unique_ptr<FDSimpleRemoteEPCTransport>>
+  Create(SimpleRemoteEPCTransportClient &C, int InFD, int OutFD);
+
+  /// Create a FDSimpleRemoteEPCTransport using the given FD for both
+  /// reading and writing.
+  static Expected<std::unique_ptr<FDSimpleRemoteEPCTransport>>
+  Create(SimpleRemoteEPCTransportClient &C, int FD) {
+    return Create(C, FD, FD);
+  }
+
+  ~FDSimpleRemoteEPCTransport() override;
+
+  Error start() override;
+
+  Error sendMessage(SimpleRemoteEPCOpcode OpC, uint64_t SeqNo,
+                    ExecutorAddr TagAddr, ArrayRef<char> ArgBytes) override;
+
+  void disconnect() override;
+
+private:
+  FDSimpleRemoteEPCTransport(SimpleRemoteEPCTransportClient &C, int InFD,
+                             int OutFD)
+      : C(C), InFD(InFD), OutFD(OutFD) {}
+
+  Error readBytes(char *Dst, size_t Size, bool *IsEOF = nullptr);
+  int writeBytes(const char *Src, size_t Size);
+  void listenLoop();
+
+  std::mutex M;
+  SimpleRemoteEPCTransportClient &C;
+  std::thread ListenerThread;
+  int InFD, OutFD;
+  std::atomic<bool> Disconnected{false};
+};
+
+struct RemoteSymbolLookupSetElement {
+  std::string Name;
+  bool Required;
+};
+
+using RemoteSymbolLookupSet = std::vector<RemoteSymbolLookupSetElement>;
+
+struct RemoteSymbolLookup {
+  uint64_t H;
+  RemoteSymbolLookupSet Symbols;
+};
+
+namespace shared {
+
+using SPSRemoteSymbolLookupSetElement = SPSTuple<SPSString, bool>;
+
+using SPSRemoteSymbolLookupSet = SPSSequence<SPSRemoteSymbolLookupSetElement>;
+
+using SPSRemoteSymbolLookup = SPSTuple<uint64_t, SPSRemoteSymbolLookupSet>;
+
+/// Tuple containing target triple, page size, and bootstrap symbols.
+using SPSSimpleRemoteEPCExecutorInfo =
+    SPSTuple<SPSString, uint64_t,
+             SPSSequence<SPSTuple<SPSString, SPSExecutorAddr>>>;
+
+template <>
+class SPSSerializationTraits<SPSRemoteSymbolLookupSetElement,
+                             RemoteSymbolLookupSetElement> {
+public:
+  static size_t size(const RemoteSymbolLookupSetElement &V) {
+    return SPSArgList<SPSString, bool>::size(V.Name, V.Required);
+  }
+
+  static size_t serialize(SPSOutputBuffer &OB,
+                          const RemoteSymbolLookupSetElement &V) {
+    return SPSArgList<SPSString, bool>::serialize(OB, V.Name, V.Required);
+  }
+
+  static size_t deserialize(SPSInputBuffer &IB,
+                            RemoteSymbolLookupSetElement &V) {
+    return SPSArgList<SPSString, bool>::deserialize(IB, V.Name, V.Required);
+  }
+};
+
+template <>
+class SPSSerializationTraits<SPSRemoteSymbolLookup, RemoteSymbolLookup> {
+public:
+  static size_t size(const RemoteSymbolLookup &V) {
+    return SPSArgList<uint64_t, SPSRemoteSymbolLookupSet>::size(V.H, V.Symbols);
+  }
+
+  static size_t serialize(SPSOutputBuffer &OB, const RemoteSymbolLookup &V) {
+    return SPSArgList<uint64_t, SPSRemoteSymbolLookupSet>::serialize(OB, V.H,
+                                                                     V.Symbols);
+  }
+
+  static size_t deserialize(SPSInputBuffer &IB, RemoteSymbolLookup &V) {
+    return SPSArgList<uint64_t, SPSRemoteSymbolLookupSet>::deserialize(
+        IB, V.H, V.Symbols);
+  }
+};
+
+template <>
+class SPSSerializationTraits<SPSSimpleRemoteEPCExecutorInfo,
+                             SimpleRemoteEPCExecutorInfo> {
+public:
+  static size_t size(const SimpleRemoteEPCExecutorInfo &SI) {
+    return SPSSimpleRemoteEPCExecutorInfo::AsArgList ::size(
+        SI.TargetTriple, SI.PageSize, SI.BootstrapSymbols);
+  }
+
+  static bool serialize(SPSOutputBuffer &OB,
+                        const SimpleRemoteEPCExecutorInfo &SI) {
+    return SPSSimpleRemoteEPCExecutorInfo::AsArgList ::serialize(
+        OB, SI.TargetTriple, SI.PageSize, SI.BootstrapSymbols);
+  }
+
+  static bool deserialize(SPSInputBuffer &IB, SimpleRemoteEPCExecutorInfo &SI) {
+    return SPSSimpleRemoteEPCExecutorInfo::AsArgList ::deserialize(
+        IB, SI.TargetTriple, SI.PageSize, SI.BootstrapSymbols);
+  }
+};
+
+using SPSLoadDylibSignature = SPSExpected<SPSExecutorAddr>(SPSExecutorAddr,
+                                                           SPSString, uint64_t);
+
+using SPSLookupSymbolsSignature =
+    SPSExpected<SPSSequence<SPSSequence<SPSExecutorAddr>>>(
+        SPSExecutorAddr, SPSSequence<SPSRemoteSymbolLookup>);
+
+} // end namespace shared
+} // end namespace orc
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_SHARED_SIMPLEREMOTEEPCUTILS_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h
index a44bcd4c8064..0e8b7e7d345a 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h
@@ -17,6 +17,10 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ExecutionEngine/JITSymbol.h"
+#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h"
+#include "llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h"
+#include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h"
+#include "llvm/Support/Memory.h"
 
 #include <vector>
 
@@ -24,12 +28,108 @@ namespace llvm {
 namespace orc {
 namespace tpctypes {
 
+enum WireProtectionFlags : uint8_t {
+  WPF_None = 0,
+  WPF_Read = 1U << 0,
+  WPF_Write = 1U << 1,
+  WPF_Exec = 1U << 2,
+  LLVM_MARK_AS_BITMASK_ENUM(WPF_Exec)
+};
+
+/// Convert from sys::Memory::ProtectionFlags
+inline WireProtectionFlags
+toWireProtectionFlags(sys::Memory::ProtectionFlags PF) {
+  WireProtectionFlags WPF = WPF_None;
+  if (PF & sys::Memory::MF_READ)
+    WPF |= WPF_Read;
+  if (PF & sys::Memory::MF_WRITE)
+    WPF |= WPF_Write;
+  if (PF & sys::Memory::MF_EXEC)
+    WPF |= WPF_Exec;
+  return WPF;
+}
+
+inline sys::Memory::ProtectionFlags
+fromWireProtectionFlags(WireProtectionFlags WPF) {
+  int PF = 0;
+  if (WPF & WPF_Read)
+    PF |= sys::Memory::MF_READ;
+  if (WPF & WPF_Write)
+    PF |= sys::Memory::MF_WRITE;
+  if (WPF & WPF_Exec)
+    PF |= sys::Memory::MF_EXEC;
+  return static_cast<sys::Memory::ProtectionFlags>(PF);
+}
+
+inline std::string getWireProtectionFlagsStr(WireProtectionFlags WPF) {
+  std::string Result;
+  Result += (WPF & WPF_Read) ? 'R' : '-';
+  Result += (WPF & WPF_Write) ? 'W' : '-';
+  Result += (WPF & WPF_Exec) ? 'X' : '-';
+  return Result;
+}
+
+struct WrapperFunctionCall {
+  ExecutorAddr Func;
+  ExecutorAddrRange ArgData;
+
+  WrapperFunctionCall() = default;
+  WrapperFunctionCall(ExecutorAddr Func, ExecutorAddr ArgData,
+                      ExecutorAddrDiff ArgSize)
+      : Func(Func), ArgData(ArgData, ArgSize) {}
+  WrapperFunctionCall(ExecutorAddr Func, ExecutorAddrRange ArgData)
+      : Func(Func), ArgData(ArgData) {}
+
+  shared::WrapperFunctionResult run() {
+    using FnTy =
+        shared::CWrapperFunctionResult(const char *ArgData, size_t ArgSize);
+    return shared::WrapperFunctionResult(
+        Func.toPtr<FnTy *>()(ArgData.Start.toPtr<const char *>(),
+                             static_cast<size_t>(ArgData.size().getValue())));
+  }
+
+  /// Run call and deserialize result using SPS.
+  template <typename SPSRetT, typename RetT> Error runWithSPSRet(RetT &RetVal) {
+    auto WFR = run();
+    if (const char *ErrMsg = WFR.getOutOfBandError())
+      return make_error<StringError>(ErrMsg, inconvertibleErrorCode());
+    shared::SPSInputBuffer IB(WFR.data(), WFR.size());
+    if (!shared::SPSSerializationTraits<SPSRetT, RetT>::deserialize(IB, RetVal))
+      return make_error<StringError>("Could not deserialize result from "
+                                     "serialized wrapper function call",
+                                     inconvertibleErrorCode());
+    return Error::success();
+  }
+
+  /// Overload for SPS functions returning void.
+  Error runWithSPSRet() {
+    shared::SPSEmpty E;
+    return runWithSPSRet<shared::SPSEmpty>(E);
+  }
+};
+
+struct AllocationActionsPair {
+  WrapperFunctionCall Finalize;
+  WrapperFunctionCall Deallocate;
+};
+
+struct SegFinalizeRequest {
+  WireProtectionFlags Prot;
+  ExecutorAddr Addr;
+  uint64_t Size;
+  ArrayRef<char> Content;
+};
+
+struct FinalizeRequest {
+  std::vector<SegFinalizeRequest> Segments;
+  std::vector<AllocationActionsPair> Actions;
+};
+
 template <typename T> struct UIntWrite {
   UIntWrite() = default;
-  UIntWrite(JITTargetAddress Address, T Value)
-      : Address(Address), Value(Value) {}
+  UIntWrite(ExecutorAddr Addr, T Value) : Addr(Addr), Value(Value) {}
 
-  JITTargetAddress Address = 0;
+  ExecutorAddr Addr;
   T Value = 0;
 };
 
@@ -49,10 +149,10 @@ using UInt64Write = UIntWrite<uint64_t>;
 /// For use with TargetProcessControl::MemoryAccess objects.
 struct BufferWrite {
   BufferWrite() = default;
-  BufferWrite(JITTargetAddress Address, StringRef Buffer)
-      : Address(Address), Buffer(Buffer) {}
+  BufferWrite(ExecutorAddr Addr, StringRef Buffer)
+      : Addr(Addr), Buffer(Buffer) {}
 
-  JITTargetAddress Address = 0;
+  ExecutorAddr Addr;
   StringRef Buffer;
 };
 
@@ -62,6 +162,180 @@ using DylibHandle = JITTargetAddress;
 using LookupResult = std::vector<JITTargetAddress>;
 
 } // end namespace tpctypes
+
+namespace shared {
+
+class SPSMemoryProtectionFlags {};
+
+using SPSWrapperFunctionCall = SPSTuple<SPSExecutorAddr, SPSExecutorAddrRange>;
+
+using SPSSegFinalizeRequest =
+    SPSTuple<SPSMemoryProtectionFlags, SPSExecutorAddr, uint64_t,
+             SPSSequence<char>>;
+
+using SPSAllocationActionsPair =
+    SPSTuple<SPSWrapperFunctionCall, SPSWrapperFunctionCall>;
+
+using SPSFinalizeRequest = SPSTuple<SPSSequence<SPSSegFinalizeRequest>,
+                                    SPSSequence<SPSAllocationActionsPair>>;
+
+template <typename T>
+using SPSMemoryAccessUIntWrite = SPSTuple<SPSExecutorAddr, T>;
+
+using SPSMemoryAccessUInt8Write = SPSMemoryAccessUIntWrite<uint8_t>;
+using SPSMemoryAccessUInt16Write = SPSMemoryAccessUIntWrite<uint16_t>;
+using SPSMemoryAccessUInt32Write = SPSMemoryAccessUIntWrite<uint32_t>;
+using SPSMemoryAccessUInt64Write = SPSMemoryAccessUIntWrite<uint64_t>;
+
+using SPSMemoryAccessBufferWrite = SPSTuple<SPSExecutorAddr, SPSSequence<char>>;
+
+template <>
+class SPSSerializationTraits<SPSMemoryProtectionFlags,
+                             tpctypes::WireProtectionFlags> {
+public:
+  static size_t size(const tpctypes::WireProtectionFlags &WPF) {
+    return SPSArgList<uint8_t>::size(static_cast<uint8_t>(WPF));
+  }
+
+  static bool serialize(SPSOutputBuffer &OB,
+                        const tpctypes::WireProtectionFlags &WPF) {
+    return SPSArgList<uint8_t>::serialize(OB, static_cast<uint8_t>(WPF));
+  }
+
+  static bool deserialize(SPSInputBuffer &IB,
+                          tpctypes::WireProtectionFlags &WPF) {
+    uint8_t Val;
+    if (!SPSArgList<uint8_t>::deserialize(IB, Val))
+      return false;
+    WPF = static_cast<tpctypes::WireProtectionFlags>(Val);
+    return true;
+  }
+};
+
+template <>
+class SPSSerializationTraits<SPSWrapperFunctionCall,
+                             tpctypes::WrapperFunctionCall> {
+  using AL = SPSWrapperFunctionCall::AsArgList;
+
+public:
+  static size_t size(const tpctypes::WrapperFunctionCall &WFC) {
+    return AL::size(WFC.Func, WFC.ArgData);
+  }
+
+  static bool serialize(SPSOutputBuffer &OB,
+                        const tpctypes::WrapperFunctionCall &WFC) {
+    return AL::serialize(OB, WFC.Func, WFC.ArgData);
+  }
+
+  static bool deserialize(SPSInputBuffer &IB,
+                          tpctypes::WrapperFunctionCall &WFC) {
+    return AL::deserialize(IB, WFC.Func, WFC.ArgData);
+  }
+};
+
+template <>
+class SPSSerializationTraits<SPSAllocationActionsPair,
+                             tpctypes::AllocationActionsPair> {
+  using AL = SPSAllocationActionsPair::AsArgList;
+
+public:
+  static size_t size(const tpctypes::AllocationActionsPair &AAP) {
+    return AL::size(AAP.Finalize, AAP.Deallocate);
+  }
+
+  static bool serialize(SPSOutputBuffer &OB,
+                        const tpctypes::AllocationActionsPair &AAP) {
+    return AL::serialize(OB, AAP.Finalize, AAP.Deallocate);
+  }
+
+  static bool deserialize(SPSInputBuffer &IB,
+                          tpctypes::AllocationActionsPair &AAP) {
+    return AL::deserialize(IB, AAP.Finalize, AAP.Deallocate);
+  }
+};
+
+template <>
+class SPSSerializationTraits<SPSSegFinalizeRequest,
+                             tpctypes::SegFinalizeRequest> {
+  using SFRAL = SPSSegFinalizeRequest::AsArgList;
+
+public:
+  static size_t size(const tpctypes::SegFinalizeRequest &SFR) {
+    return SFRAL::size(SFR.Prot, SFR.Addr, SFR.Size, SFR.Content);
+  }
+
+  static bool serialize(SPSOutputBuffer &OB,
+                        const tpctypes::SegFinalizeRequest &SFR) {
+    return SFRAL::serialize(OB, SFR.Prot, SFR.Addr, SFR.Size, SFR.Content);
+  }
+
+  static bool deserialize(SPSInputBuffer &IB,
+                          tpctypes::SegFinalizeRequest &SFR) {
+    return SFRAL::deserialize(IB, SFR.Prot, SFR.Addr, SFR.Size, SFR.Content);
+  }
+};
+
+template <>
+class SPSSerializationTraits<SPSFinalizeRequest, tpctypes::FinalizeRequest> {
+  using FRAL = SPSFinalizeRequest::AsArgList;
+
+public:
+  static size_t size(const tpctypes::FinalizeRequest &FR) {
+    return FRAL::size(FR.Segments, FR.Actions);
+  }
+
+  static bool serialize(SPSOutputBuffer &OB,
+                        const tpctypes::FinalizeRequest &FR) {
+    return FRAL::serialize(OB, FR.Segments, FR.Actions);
+  }
+
+  static bool deserialize(SPSInputBuffer &IB, tpctypes::FinalizeRequest &FR) {
+    return FRAL::deserialize(IB, FR.Segments, FR.Actions);
+  }
+};
+
+template <typename T>
+class SPSSerializationTraits<SPSMemoryAccessUIntWrite<T>,
+                             tpctypes::UIntWrite<T>> {
+public:
+  static size_t size(const tpctypes::UIntWrite<T> &W) {
+    return SPSTuple<SPSExecutorAddr, T>::AsArgList::size(W.Addr, W.Value);
+  }
+
+  static bool serialize(SPSOutputBuffer &OB, const tpctypes::UIntWrite<T> &W) {
+    return SPSTuple<SPSExecutorAddr, T>::AsArgList::serialize(OB, W.Addr,
+                                                              W.Value);
+  }
+
+  static bool deserialize(SPSInputBuffer &IB, tpctypes::UIntWrite<T> &W) {
+    return SPSTuple<SPSExecutorAddr, T>::AsArgList::deserialize(IB, W.Addr,
+                                                                W.Value);
+  }
+};
+
+template <>
+class SPSSerializationTraits<SPSMemoryAccessBufferWrite,
+                             tpctypes::BufferWrite> {
+public:
+  static size_t size(const tpctypes::BufferWrite &W) {
+    return SPSTuple<SPSExecutorAddr, SPSSequence<char>>::AsArgList::size(
+        W.Addr, W.Buffer);
+  }
+
+  static bool serialize(SPSOutputBuffer &OB, const tpctypes::BufferWrite &W) {
+    return SPSTuple<SPSExecutorAddr, SPSSequence<char>>::AsArgList ::serialize(
+        OB, W.Addr, W.Buffer);
+  }
+
+  static bool deserialize(SPSInputBuffer &IB, tpctypes::BufferWrite &W) {
+    return SPSTuple<SPSExecutorAddr,
+                    SPSSequence<char>>::AsArgList ::deserialize(IB, W.Addr,
+                                                                W.Buffer);
+  }
+};
+
+
+} // end namespace shared
 } // end namespace orc
 } // end namespace llvm
 
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h
index 2f14a1c76332..bf841b1f706b 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h
@@ -10,9 +10,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_EXECUTIONENGINE_ORC_WRAPPERFUNCTIONUTILS_H
-#define LLVM_EXECUTIONENGINE_ORC_WRAPPERFUNCTIONUTILS_H
+#ifndef LLVM_EXECUTIONENGINE_ORC_SHARED_WRAPPERFUNCTIONUTILS_H
+#define LLVM_EXECUTIONENGINE_ORC_SHARED_WRAPPERFUNCTIONUTILS_H
 
+#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h"
 #include "llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h"
 #include "llvm/Support/Error.h"
 
@@ -22,24 +23,18 @@ namespace llvm {
 namespace orc {
 namespace shared {
 
-namespace detail {
-
-// DO NOT USE DIRECTLY.
 // Must be kept in-sync with compiler-rt/lib/orc/c-api.h.
 union CWrapperFunctionResultDataUnion {
   char *ValuePtr;
   char Value[sizeof(ValuePtr)];
 };
 
-// DO NOT USE DIRECTLY.
 // Must be kept in-sync with compiler-rt/lib/orc/c-api.h.
 typedef struct {
   CWrapperFunctionResultDataUnion Data;
   size_t Size;
 } CWrapperFunctionResult;
 
-} // end namespace detail
-
 /// C++ wrapper function result: Same as CWrapperFunctionResult but
 /// auto-releases memory.
 class WrapperFunctionResult {
@@ -48,11 +43,11 @@ public:
   WrapperFunctionResult() { init(R); }
 
   /// Create a WrapperFunctionResult by taking ownership of a
-  /// detail::CWrapperFunctionResult.
+  /// CWrapperFunctionResult.
   ///
   /// Warning: This should only be used by clients writing wrapper-function
   /// caller utilities (like TargetProcessControl).
-  WrapperFunctionResult(detail::CWrapperFunctionResult R) : R(R) {
+  WrapperFunctionResult(CWrapperFunctionResult R) : R(R) {
     // Reset R.
     init(R);
   }
@@ -77,18 +72,25 @@ public:
       free(R.Data.ValuePtr);
   }
 
-  /// Release ownership of the contained detail::CWrapperFunctionResult.
+  /// Release ownership of the contained CWrapperFunctionResult.
   /// Warning: Do not use -- this method will be removed in the future. It only
   /// exists to temporarily support some code that will eventually be moved to
   /// the ORC runtime.
-  detail::CWrapperFunctionResult release() {
-    detail::CWrapperFunctionResult Tmp;
+  CWrapperFunctionResult release() {
+    CWrapperFunctionResult Tmp;
     init(Tmp);
     std::swap(R, Tmp);
     return Tmp;
   }
 
   /// Get a pointer to the data contained in this instance.
+  char *data() {
+    assert((R.Size != 0 || R.Data.ValuePtr == nullptr) &&
+           "Cannot get data for out-of-band error value");
+    return R.Size > sizeof(R.Data.Value) ? R.Data.ValuePtr : R.Data.Value;
+  }
+
+  /// Get a const pointer to the data contained in this instance.
   const char *data() const {
     assert((R.Size != 0 || R.Data.ValuePtr == nullptr) &&
            "Cannot get data for out-of-band error value");
@@ -108,24 +110,19 @@ public:
 
   /// Create a WrapperFunctionResult with the given size and return a pointer
   /// to the underlying memory.
-  static char *allocate(WrapperFunctionResult &WFR, size_t Size) {
+  static WrapperFunctionResult allocate(size_t Size) {
     // Reset.
-    WFR = WrapperFunctionResult();
+    WrapperFunctionResult WFR;
     WFR.R.Size = Size;
-    char *DataPtr;
-    if (WFR.R.Size > sizeof(WFR.R.Data.Value)) {
-      DataPtr = (char *)malloc(WFR.R.Size);
-      WFR.R.Data.ValuePtr = DataPtr;
-    } else
-      DataPtr = WFR.R.Data.Value;
-    return DataPtr;
+    if (WFR.R.Size > sizeof(WFR.R.Data.Value))
+      WFR.R.Data.ValuePtr = (char *)malloc(WFR.R.Size);
+    return WFR;
   }
 
   /// Copy from the given char range.
   static WrapperFunctionResult copyFrom(const char *Source, size_t Size) {
-    WrapperFunctionResult WFR;
-    char *DataPtr = allocate(WFR, Size);
-    memcpy(DataPtr, Source, Size);
+    auto WFR = allocate(Size);
+    memcpy(WFR.data(), Source, Size);
     return WFR;
   }
 
@@ -161,12 +158,12 @@ public:
   }
 
 private:
-  static void init(detail::CWrapperFunctionResult &R) {
+  static void init(CWrapperFunctionResult &R) {
     R.Data.ValuePtr = nullptr;
     R.Size = 0;
   }
 
-  detail::CWrapperFunctionResult R;
+  CWrapperFunctionResult R;
 };
 
 namespace detail {
@@ -174,10 +171,8 @@ namespace detail {
 template <typename SPSArgListT, typename... ArgTs>
 WrapperFunctionResult
 serializeViaSPSToWrapperFunctionResult(const ArgTs &...Args) {
-  WrapperFunctionResult Result;
-  char *DataPtr =
-      WrapperFunctionResult::allocate(Result, SPSArgListT::size(Args...));
-  SPSOutputBuffer OB(DataPtr, Result.size());
+  auto Result = WrapperFunctionResult::allocate(SPSArgListT::size(Args...));
+  SPSOutputBuffer OB(Result.data(), Result.size());
   if (!SPSArgListT::serialize(OB, Args...))
     return WrapperFunctionResult::createOutOfBandError(
         "Error serializing arguments to blob in call");
@@ -315,6 +310,7 @@ private:
   static void callAsync(HandlerT &&H,
                         SerializeAndSendResultT &&SerializeAndSendResult,
                         ArgTupleT Args, std::index_sequence<I...>) {
+    (void)Args; // Silence a buggy GCC warning.
     return std::forward<HandlerT>(H)(std::move(SerializeAndSendResult),
                                      std::move(std::get<I>(Args))...);
   }
@@ -486,10 +482,16 @@ public:
     }
 
     auto SendSerializedResult = [SDR = std::move(SendDeserializedResult)](
-                                    WrapperFunctionResult R) {
+                                    WrapperFunctionResult R) mutable {
       RetT RetVal = detail::ResultDeserializer<SPSRetTagT, RetT>::makeValue();
       detail::ResultDeserializer<SPSRetTagT, RetT>::makeSafe(RetVal);
 
+      if (auto *ErrMsg = R.getOutOfBandError()) {
+        SDR(make_error<StringError>(ErrMsg, inconvertibleErrorCode()),
+            std::move(RetVal));
+        return;
+      }
+
       SPSInputBuffer IB(R.data(), R.size());
       if (auto Err = detail::ResultDeserializer<SPSRetTagT, RetT>::deserialize(
               RetVal, R.data(), R.size()))
@@ -547,12 +549,68 @@ public:
     return WrapperFunction<SPSEmpty(SPSTagTs...)>::call(Caller, BE, Args...);
   }
 
+  template <typename AsyncCallerFn, typename SendDeserializedResultFn,
+            typename... ArgTs>
+  static void callAsync(AsyncCallerFn &&Caller,
+                        SendDeserializedResultFn &&SendDeserializedResult,
+                        const ArgTs &...Args) {
+    WrapperFunction<SPSEmpty(SPSTagTs...)>::callAsync(
+        std::forward<AsyncCallerFn>(Caller),
+        [SDR = std::move(SendDeserializedResult)](Error SerializeErr,
+                                                  SPSEmpty E) mutable {
+          SDR(std::move(SerializeErr));
+        },
+        Args...);
+  }
+
   using WrapperFunction<SPSEmpty(SPSTagTs...)>::handle;
   using WrapperFunction<SPSEmpty(SPSTagTs...)>::handleAsync;
 };
 
+/// A function object that takes an ExecutorAddr as its first argument,
+/// casts that address to a ClassT*, then calls the given method on that
+/// pointer passing in the remaining function arguments. This utility
+/// removes some of the boilerplate from writing wrappers for method calls.
+///
+///   @code{.cpp}
+///   class MyClass {
+///   public:
+///     void myMethod(uint32_t, bool) { ... }
+///   };
+///
+///   // SPS Method signature -- note MyClass object address as first argument.
+///   using SPSMyMethodWrapperSignature =
+///     SPSTuple<SPSExecutorAddr, uint32_t, bool>;
+///
+///   WrapperFunctionResult
+///   myMethodCallWrapper(const char *ArgData, size_t ArgSize) {
+///     return WrapperFunction<SPSMyMethodWrapperSignature>::handle(
+///        ArgData, ArgSize, makeMethodWrapperHandler(&MyClass::myMethod));
+///   }
+///   @endcode
+///
+template <typename RetT, typename ClassT, typename... ArgTs>
+class MethodWrapperHandler {
+public:
+  using MethodT = RetT (ClassT::*)(ArgTs...);
+  MethodWrapperHandler(MethodT M) : M(M) {}
+  RetT operator()(ExecutorAddr ObjAddr, ArgTs &...Args) {
+    return (ObjAddr.toPtr<ClassT*>()->*M)(std::forward<ArgTs>(Args)...);
+  }
+
+private:
+  MethodT M;
+};
+
+/// Create a MethodWrapperHandler object from the given method pointer.
+template <typename RetT, typename ClassT, typename... ArgTs>
+MethodWrapperHandler<RetT, ClassT, ArgTs...>
+makeMethodWrapperHandler(RetT (ClassT::*Method)(ArgTs...)) {
+  return MethodWrapperHandler<RetT, ClassT, ArgTs...>(Method);
+}
+
 } // end namespace shared
 } // end namespace orc
 } // end namespace llvm
 
-#endif // LLVM_EXECUTIONENGINE_ORC_WRAPPERFUNCTIONUTILS_H
+#endif // LLVM_EXECUTIONENGINE_ORC_SHARED_WRAPPERFUNCTIONUTILS_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/SimpleRemoteEPC.h b/llvm/include/llvm/ExecutionEngine/Orc/SimpleRemoteEPC.h
new file mode 100644
index 000000000000..bd72e4535325
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/Orc/SimpleRemoteEPC.h
@@ -0,0 +1,140 @@
+//===---- SimpleRemoteEPC.h - Simple remote executor control ----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Simple remote executor process control.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_SIMPLEREMOTEEPC_H
+#define LLVM_EXECUTIONENGINE_ORC_SIMPLEREMOTEEPC_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/FunctionExtras.h"
+#include "llvm/ExecutionEngine/Orc/EPCGenericDylibManager.h"
+#include "llvm/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.h"
+#include "llvm/ExecutionEngine/Orc/EPCGenericMemoryAccess.h"
+#include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h"
+#include "llvm/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/MSVCErrorWorkarounds.h"
+
+#include <future>
+
+namespace llvm {
+namespace orc {
+
+class SimpleRemoteEPC : public ExecutorProcessControl,
+                        public SimpleRemoteEPCTransportClient {
+public:
+  /// A setup object containing callbacks to construct a memory manager and
+  /// memory access object. Both are optional. If not specified,
+  /// EPCGenericJITLinkMemoryManager and EPCGenericMemoryAccess will be used.
+  struct Setup {
+    using CreateMemoryManagerFn =
+        Expected<std::unique_ptr<jitlink::JITLinkMemoryManager>>(
+            SimpleRemoteEPC &);
+    using CreateMemoryAccessFn =
+        Expected<std::unique_ptr<MemoryAccess>>(SimpleRemoteEPC &);
+
+    unique_function<CreateMemoryManagerFn> CreateMemoryManager;
+    unique_function<CreateMemoryAccessFn> CreateMemoryAccess;
+  };
+
+  /// Create a SimpleRemoteEPC using the given transport type and args.
+  template <typename TransportT, typename... TransportTCtorArgTs>
+  static Expected<std::unique_ptr<SimpleRemoteEPC>>
+  Create(std::unique_ptr<TaskDispatcher> D, Setup S,
+         TransportTCtorArgTs &&...TransportTCtorArgs) {
+    std::unique_ptr<SimpleRemoteEPC> SREPC(
+        new SimpleRemoteEPC(std::make_shared<SymbolStringPool>(),
+                            std::move(D)));
+    auto T = TransportT::Create(
+        *SREPC, std::forward<TransportTCtorArgTs>(TransportTCtorArgs)...);
+    if (!T)
+      return T.takeError();
+    SREPC->T = std::move(*T);
+    if (auto Err = SREPC->setup(std::move(S)))
+      return joinErrors(std::move(Err), SREPC->disconnect());
+    return std::move(SREPC);
+  }
+
+  SimpleRemoteEPC(const SimpleRemoteEPC &) = delete;
+  SimpleRemoteEPC &operator=(const SimpleRemoteEPC &) = delete;
+  SimpleRemoteEPC(SimpleRemoteEPC &&) = delete;
+  SimpleRemoteEPC &operator=(SimpleRemoteEPC &&) = delete;
+  ~SimpleRemoteEPC();
+
+  Expected<tpctypes::DylibHandle> loadDylib(const char *DylibPath) override;
+
+  Expected<std::vector<tpctypes::LookupResult>>
+  lookupSymbols(ArrayRef<LookupRequest> Request) override;
+
+  Expected<int32_t> runAsMain(ExecutorAddr MainFnAddr,
+                              ArrayRef<std::string> Args) override;
+
+  void callWrapperAsync(ExecutorAddr WrapperFnAddr,
+                        IncomingWFRHandler OnComplete,
+                        ArrayRef<char> ArgBuffer) override;
+
+  Error disconnect() override;
+
+  Expected<HandleMessageAction>
+  handleMessage(SimpleRemoteEPCOpcode OpC, uint64_t SeqNo, ExecutorAddr TagAddr,
+                SimpleRemoteEPCArgBytesVector ArgBytes) override;
+
+  void handleDisconnect(Error Err) override;
+
+private:
+  SimpleRemoteEPC(std::shared_ptr<SymbolStringPool> SSP,
+                  std::unique_ptr<TaskDispatcher> D)
+    : ExecutorProcessControl(std::move(SSP), std::move(D)) {}
+
+  static Expected<std::unique_ptr<jitlink::JITLinkMemoryManager>>
+  createDefaultMemoryManager(SimpleRemoteEPC &SREPC);
+  static Expected<std::unique_ptr<MemoryAccess>>
+  createDefaultMemoryAccess(SimpleRemoteEPC &SREPC);
+
+  Error sendMessage(SimpleRemoteEPCOpcode OpC, uint64_t SeqNo,
+                    ExecutorAddr TagAddr, ArrayRef<char> ArgBytes);
+
+  Error handleSetup(uint64_t SeqNo, ExecutorAddr TagAddr,
+                    SimpleRemoteEPCArgBytesVector ArgBytes);
+  Error setup(Setup S);
+
+  Error handleResult(uint64_t SeqNo, ExecutorAddr TagAddr,
+                     SimpleRemoteEPCArgBytesVector ArgBytes);
+  void handleCallWrapper(uint64_t RemoteSeqNo, ExecutorAddr TagAddr,
+                         SimpleRemoteEPCArgBytesVector ArgBytes);
+  Error handleHangup(SimpleRemoteEPCArgBytesVector ArgBytes);
+
+  uint64_t getNextSeqNo() { return NextSeqNo++; }
+  void releaseSeqNo(uint64_t SeqNo) {}
+
+  using PendingCallWrapperResultsMap =
+    DenseMap<uint64_t, IncomingWFRHandler>;
+
+  std::mutex SimpleRemoteEPCMutex;
+  std::condition_variable DisconnectCV;
+  bool Disconnected = false;
+  Error DisconnectErr = Error::success();
+
+  std::unique_ptr<SimpleRemoteEPCTransport> T;
+  std::unique_ptr<jitlink::JITLinkMemoryManager> OwnedMemMgr;
+  std::unique_ptr<MemoryAccess> OwnedMemAccess;
+
+  std::unique_ptr<EPCGenericDylibManager> DylibMgr;
+  ExecutorAddr RunAsMainAddr;
+
+  uint64_t NextSeqNo = 0;
+  PendingCallWrapperResultsMap PendingCallWrapperResults;
+};
+
+} // end namespace orc
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_SIMPLEREMOTEEPC_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/ExecutorBootstrapService.h b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/ExecutorBootstrapService.h
new file mode 100644
index 000000000000..32c127634b25
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/ExecutorBootstrapService.h
@@ -0,0 +1,36 @@
+//===- ExecutorService.h - Provide bootstrap symbols to session -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Provides a service by supplying some set of bootstrap symbols.
+//
+// FIXME: The functionality in this file should be moved to the ORC runtime.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_EXECUTORBOOTSTRAPSERVICE_H
+#define LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_EXECUTORBOOTSTRAPSERVICE_H
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h"
+
+namespace llvm {
+namespace orc {
+
+class ExecutorBootstrapService {
+public:
+  virtual ~ExecutorBootstrapService();
+
+  virtual void
+  addBootstrapSymbols(StringMap<ExecutorAddr> &BootstrapSymbols) = 0;
+  virtual Error shutdown() = 0;
+};
+
+} // end namespace orc
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_EXECUTORBOOTSTRAPSERVICE_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.h b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.h
index 3fad98b5f178..cfb951178da6 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.h
@@ -16,7 +16,7 @@
 #include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h"
 #include <cstdint>
 
-extern "C" llvm::orc::shared::detail::CWrapperFunctionResult
+extern "C" llvm::orc::shared::CWrapperFunctionResult
 llvm_orc_registerJITLoaderGDBWrapper(const char *Data, uint64_t Size);
 
 #endif // LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_JITLOADERGDB_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/OrcRPCTPCServer.h b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/OrcRPCTPCServer.h
deleted file mode 100644
index 96e4341fce68..000000000000
--- a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/OrcRPCTPCServer.h
+++ /dev/null
@@ -1,660 +0,0 @@
-//===-- OrcRPCTPCServer.h -- OrcRPCTargetProcessControl Server --*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// OrcRPCTargetProcessControl server class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_ORCRPCTPCSERVER_H
-#define LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_ORCRPCTPCSERVER_H
-
-#include "llvm/ADT/BitmaskEnum.h"
-#include "llvm/ExecutionEngine/Orc/Shared/RPCUtils.h"
-#include "llvm/ExecutionEngine/Orc/Shared/RawByteChannel.h"
-#include "llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h"
-#include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h"
-#include "llvm/ExecutionEngine/Orc/TargetProcess/TargetExecutionUtils.h"
-#include "llvm/Support/DynamicLibrary.h"
-#include "llvm/Support/FormatVariadic.h"
-#include "llvm/Support/Host.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/Memory.h"
-#include "llvm/Support/Process.h"
-
-#include <atomic>
-
-namespace llvm {
-namespace orc {
-
-namespace orcrpctpc {
-
-enum WireProtectionFlags : uint8_t {
-  WPF_None = 0,
-  WPF_Read = 1U << 0,
-  WPF_Write = 1U << 1,
-  WPF_Exec = 1U << 2,
-  LLVM_MARK_AS_BITMASK_ENUM(WPF_Exec)
-};
-
-struct ExecutorProcessInfo {
-  std::string Triple;
-  unsigned PageSize;
-  JITTargetAddress DispatchFuncAddr;
-  JITTargetAddress DispatchCtxAddr;
-};
-
-/// Convert from sys::Memory::ProtectionFlags
-inline WireProtectionFlags
-toWireProtectionFlags(sys::Memory::ProtectionFlags PF) {
-  WireProtectionFlags WPF = WPF_None;
-  if (PF & sys::Memory::MF_READ)
-    WPF |= WPF_Read;
-  if (PF & sys::Memory::MF_WRITE)
-    WPF |= WPF_Write;
-  if (PF & sys::Memory::MF_EXEC)
-    WPF |= WPF_Exec;
-  return WPF;
-}
-
-inline sys::Memory::ProtectionFlags
-fromWireProtectionFlags(WireProtectionFlags WPF) {
-  int PF = 0;
-  if (WPF & WPF_Read)
-    PF |= sys::Memory::MF_READ;
-  if (WPF & WPF_Write)
-    PF |= sys::Memory::MF_WRITE;
-  if (WPF & WPF_Exec)
-    PF |= sys::Memory::MF_EXEC;
-  return static_cast<sys::Memory::ProtectionFlags>(PF);
-}
-
-struct ReserveMemRequestElement {
-  WireProtectionFlags Prot = WPF_None;
-  uint64_t Size = 0;
-  uint64_t Alignment = 0;
-};
-
-using ReserveMemRequest = std::vector<ReserveMemRequestElement>;
-
-struct ReserveMemResultElement {
-  WireProtectionFlags Prot = WPF_None;
-  JITTargetAddress Address = 0;
-  uint64_t AllocatedSize = 0;
-};
-
-using ReserveMemResult = std::vector<ReserveMemResultElement>;
-
-struct ReleaseOrFinalizeMemRequestElement {
-  WireProtectionFlags Prot = WPF_None;
-  JITTargetAddress Address = 0;
-  uint64_t Size = 0;
-};
-
-using ReleaseOrFinalizeMemRequest =
-    std::vector<ReleaseOrFinalizeMemRequestElement>;
-
-} // end namespace orcrpctpc
-
-namespace shared {
-
-template <> class SerializationTypeName<WrapperFunctionResult> {
-public:
-  static const char *getName() { return "WrapperFunctionResult"; }
-};
-
-template <typename ChannelT>
-class SerializationTraits<
-    ChannelT, WrapperFunctionResult, WrapperFunctionResult,
-    std::enable_if_t<std::is_base_of<RawByteChannel, ChannelT>::value>> {
-public:
-  static Error serialize(ChannelT &C, const WrapperFunctionResult &E) {
-    if (auto Err = serializeSeq(C, static_cast<uint64_t>(E.size())))
-      return Err;
-    if (E.size() == 0)
-      return Error::success();
-    return C.appendBytes(E.data(), E.size());
-  }
-
-  static Error deserialize(ChannelT &C, WrapperFunctionResult &E) {
-    uint64_t Size;
-    if (auto Err = deserializeSeq(C, Size))
-      return Err;
-
-    WrapperFunctionResult Tmp;
-    char *Data = WrapperFunctionResult::allocate(Tmp, Size);
-
-    if (auto Err = C.readBytes(Data, Size))
-      return Err;
-
-    E = std::move(Tmp);
-
-    return Error::success();
-  }
-};
-
-template <> class SerializationTypeName<tpctypes::UInt8Write> {
-public:
-  static const char *getName() { return "UInt8Write"; }
-};
-
-template <> class SerializationTypeName<tpctypes::UInt16Write> {
-public:
-  static const char *getName() { return "UInt16Write"; }
-};
-
-template <> class SerializationTypeName<tpctypes::UInt32Write> {
-public:
-  static const char *getName() { return "UInt32Write"; }
-};
-
-template <> class SerializationTypeName<tpctypes::UInt64Write> {
-public:
-  static const char *getName() { return "UInt64Write"; }
-};
-
-template <> class SerializationTypeName<tpctypes::BufferWrite> {
-public:
-  static const char *getName() { return "BufferWrite"; }
-};
-
-template <> class SerializationTypeName<orcrpctpc::ReserveMemRequestElement> {
-public:
-  static const char *getName() { return "ReserveMemRequestElement"; }
-};
-
-template <> class SerializationTypeName<orcrpctpc::ReserveMemResultElement> {
-public:
-  static const char *getName() { return "ReserveMemResultElement"; }
-};
-
-template <>
-class SerializationTypeName<orcrpctpc::ReleaseOrFinalizeMemRequestElement> {
-public:
-  static const char *getName() { return "ReleaseOrFinalizeMemRequestElement"; }
-};
-
-template <> class SerializationTypeName<orcrpctpc::ExecutorProcessInfo> {
-public:
-  static const char *getName() { return "ExecutorProcessInfo"; }
-};
-
-template <typename ChannelT, typename WriteT>
-class SerializationTraits<
-    ChannelT, WriteT, WriteT,
-    std::enable_if_t<std::is_same<WriteT, tpctypes::UInt8Write>::value ||
-                     std::is_same<WriteT, tpctypes::UInt16Write>::value ||
-                     std::is_same<WriteT, tpctypes::UInt32Write>::value ||
-                     std::is_same<WriteT, tpctypes::UInt64Write>::value>> {
-public:
-  static Error serialize(ChannelT &C, const WriteT &W) {
-    return serializeSeq(C, W.Address, W.Value);
-  }
-  static Error deserialize(ChannelT &C, WriteT &W) {
-    return deserializeSeq(C, W.Address, W.Value);
-  }
-};
-
-template <typename ChannelT>
-class SerializationTraits<
-    ChannelT, tpctypes::BufferWrite, tpctypes::BufferWrite,
-    std::enable_if_t<std::is_base_of<RawByteChannel, ChannelT>::value>> {
-public:
-  static Error serialize(ChannelT &C, const tpctypes::BufferWrite &W) {
-    uint64_t Size = W.Buffer.size();
-    if (auto Err = serializeSeq(C, W.Address, Size))
-      return Err;
-
-    return C.appendBytes(W.Buffer.data(), Size);
-  }
-  static Error deserialize(ChannelT &C, tpctypes::BufferWrite &W) {
-    JITTargetAddress Address;
-    uint64_t Size;
-
-    if (auto Err = deserializeSeq(C, Address, Size))
-      return Err;
-
-    char *Buffer = jitTargetAddressToPointer<char *>(Address);
-
-    if (auto Err = C.readBytes(Buffer, Size))
-      return Err;
-
-    W = {Address, StringRef(Buffer, Size)};
-    return Error::success();
-  }
-};
-
-template <typename ChannelT>
-class SerializationTraits<ChannelT, orcrpctpc::ReserveMemRequestElement> {
-public:
-  static Error serialize(ChannelT &C,
-                         const orcrpctpc::ReserveMemRequestElement &E) {
-    return serializeSeq(C, static_cast<uint8_t>(E.Prot), E.Size, E.Alignment);
-  }
-
-  static Error deserialize(ChannelT &C,
-                           orcrpctpc::ReserveMemRequestElement &E) {
-    return deserializeSeq(C, *reinterpret_cast<uint8_t *>(&E.Prot), E.Size,
-                          E.Alignment);
-  }
-};
-
-template <typename ChannelT>
-class SerializationTraits<ChannelT, orcrpctpc::ReserveMemResultElement> {
-public:
-  static Error serialize(ChannelT &C,
-                         const orcrpctpc::ReserveMemResultElement &E) {
-    return serializeSeq(C, static_cast<uint8_t>(E.Prot), E.Address,
-                        E.AllocatedSize);
-  }
-
-  static Error deserialize(ChannelT &C, orcrpctpc::ReserveMemResultElement &E) {
-    return deserializeSeq(C, *reinterpret_cast<uint8_t *>(&E.Prot), E.Address,
-                          E.AllocatedSize);
-  }
-};
-
-template <typename ChannelT>
-class SerializationTraits<ChannelT,
-                          orcrpctpc::ReleaseOrFinalizeMemRequestElement> {
-public:
-  static Error
-  serialize(ChannelT &C,
-            const orcrpctpc::ReleaseOrFinalizeMemRequestElement &E) {
-    return serializeSeq(C, static_cast<uint8_t>(E.Prot), E.Address, E.Size);
-  }
-
-  static Error deserialize(ChannelT &C,
-                           orcrpctpc::ReleaseOrFinalizeMemRequestElement &E) {
-    return deserializeSeq(C, *reinterpret_cast<uint8_t *>(&E.Prot), E.Address,
-                          E.Size);
-  }
-};
-
-template <typename ChannelT>
-class SerializationTraits<ChannelT, orcrpctpc::ExecutorProcessInfo> {
-public:
-  static Error serialize(ChannelT &C,
-                         const orcrpctpc::ExecutorProcessInfo &EPI) {
-    return serializeSeq(C, EPI.Triple, EPI.PageSize, EPI.DispatchFuncAddr,
-                        EPI.DispatchCtxAddr);
-  }
-
-  static Error deserialize(ChannelT &C, orcrpctpc::ExecutorProcessInfo &EPI) {
-    return deserializeSeq(C, EPI.Triple, EPI.PageSize, EPI.DispatchFuncAddr,
-                          EPI.DispatchCtxAddr);
-  }
-};
-
-} // end namespace shared
-
-namespace orcrpctpc {
-
-using RemoteSymbolLookupSet = std::vector<std::pair<std::string, bool>>;
-using RemoteLookupRequest =
-    std::pair<tpctypes::DylibHandle, RemoteSymbolLookupSet>;
-
-class GetExecutorProcessInfo
-    : public shared::RPCFunction<GetExecutorProcessInfo,
-                                 orcrpctpc::ExecutorProcessInfo()> {
-public:
-  static const char *getName() { return "GetJITDispatchInfo"; }
-};
-
-class ReserveMem
-    : public shared::RPCFunction<ReserveMem, Expected<ReserveMemResult>(
-                                                 ReserveMemRequest)> {
-public:
-  static const char *getName() { return "ReserveMem"; }
-};
-
-class FinalizeMem
-    : public shared::RPCFunction<FinalizeMem,
-                                 Error(ReleaseOrFinalizeMemRequest)> {
-public:
-  static const char *getName() { return "FinalizeMem"; }
-};
-
-class ReleaseMem
-    : public shared::RPCFunction<ReleaseMem,
-                                 Error(ReleaseOrFinalizeMemRequest)> {
-public:
-  static const char *getName() { return "ReleaseMem"; }
-};
-
-class WriteUInt8s
-    : public shared::RPCFunction<WriteUInt8s,
-                                 Error(std::vector<tpctypes::UInt8Write>)> {
-public:
-  static const char *getName() { return "WriteUInt8s"; }
-};
-
-class WriteUInt16s
-    : public shared::RPCFunction<WriteUInt16s,
-                                 Error(std::vector<tpctypes::UInt16Write>)> {
-public:
-  static const char *getName() { return "WriteUInt16s"; }
-};
-
-class WriteUInt32s
-    : public shared::RPCFunction<WriteUInt32s,
-                                 Error(std::vector<tpctypes::UInt32Write>)> {
-public:
-  static const char *getName() { return "WriteUInt32s"; }
-};
-
-class WriteUInt64s
-    : public shared::RPCFunction<WriteUInt64s,
-                                 Error(std::vector<tpctypes::UInt64Write>)> {
-public:
-  static const char *getName() { return "WriteUInt64s"; }
-};
-
-class WriteBuffers
-    : public shared::RPCFunction<WriteBuffers,
-                                 Error(std::vector<tpctypes::BufferWrite>)> {
-public:
-  static const char *getName() { return "WriteBuffers"; }
-};
-
-class LoadDylib
-    : public shared::RPCFunction<LoadDylib, Expected<tpctypes::DylibHandle>(
-                                                std::string DylibPath)> {
-public:
-  static const char *getName() { return "LoadDylib"; }
-};
-
-class LookupSymbols
-    : public shared::RPCFunction<LookupSymbols,
-                                 Expected<std::vector<tpctypes::LookupResult>>(
-                                     std::vector<RemoteLookupRequest>)> {
-public:
-  static const char *getName() { return "LookupSymbols"; }
-};
-
-class RunMain
-    : public shared::RPCFunction<RunMain,
-                                 int64_t(JITTargetAddress MainAddr,
-                                         std::vector<std::string> Args)> {
-public:
-  static const char *getName() { return "RunMain"; }
-};
-
-class RunWrapper
-    : public shared::RPCFunction<RunWrapper,
-                                 shared::WrapperFunctionResult(
-                                     JITTargetAddress, std::vector<uint8_t>)> {
-public:
-  static const char *getName() { return "RunWrapper"; }
-};
-
-class CloseConnection : public shared::RPCFunction<CloseConnection, void()> {
-public:
-  static const char *getName() { return "CloseConnection"; }
-};
-
-} // end namespace orcrpctpc
-
-/// TargetProcessControl for a process connected via an ORC RPC Endpoint.
-template <typename RPCEndpointT> class OrcRPCTPCServer {
-private:
-  using ThisT = OrcRPCTPCServer<RPCEndpointT>;
-
-public:
-  /// Create an OrcRPCTPCServer from the given endpoint.
-  OrcRPCTPCServer(RPCEndpointT &EP) : EP(EP) {
-
-    TripleStr = sys::getProcessTriple();
-    PageSize = sys::Process::getPageSizeEstimate();
-
-    EP.template addHandler<orcrpctpc::GetExecutorProcessInfo>(
-        *this, &ThisT::getExecutorProcessInfo);
-    EP.template addHandler<orcrpctpc::ReserveMem>(*this, &ThisT::reserveMemory);
-    EP.template addHandler<orcrpctpc::FinalizeMem>(*this,
-                                                   &ThisT::finalizeMemory);
-    EP.template addHandler<orcrpctpc::ReleaseMem>(*this, &ThisT::releaseMemory);
-
-    EP.template addHandler<orcrpctpc::WriteUInt8s>(
-        handleWriteUInt<tpctypes::UInt8Write>);
-    EP.template addHandler<orcrpctpc::WriteUInt16s>(
-        handleWriteUInt<tpctypes::UInt16Write>);
-    EP.template addHandler<orcrpctpc::WriteUInt32s>(
-        handleWriteUInt<tpctypes::UInt32Write>);
-    EP.template addHandler<orcrpctpc::WriteUInt64s>(
-        handleWriteUInt<tpctypes::UInt64Write>);
-    EP.template addHandler<orcrpctpc::WriteBuffers>(handleWriteBuffer);
-
-    EP.template addHandler<orcrpctpc::LoadDylib>(*this, &ThisT::loadDylib);
-    EP.template addHandler<orcrpctpc::LookupSymbols>(*this,
-                                                     &ThisT::lookupSymbols);
-
-    EP.template addHandler<orcrpctpc::RunMain>(*this, &ThisT::runMain);
-    EP.template addHandler<orcrpctpc::RunWrapper>(*this, &ThisT::runWrapper);
-
-    EP.template addHandler<orcrpctpc::CloseConnection>(*this,
-                                                       &ThisT::closeConnection);
-  }
-
-  /// Set the ProgramName to be used as the first argv element when running
-  /// functions via runAsMain.
-  void setProgramName(Optional<std::string> ProgramName = None) {
-    this->ProgramName = std::move(ProgramName);
-  }
-
-  /// Get the RPC endpoint for this server.
-  RPCEndpointT &getEndpoint() { return EP; }
-
-  /// Run the server loop.
-  Error run() {
-    while (!Finished) {
-      if (auto Err = EP.handleOne())
-        return Err;
-    }
-    return Error::success();
-  }
-
-  Expected<shared::WrapperFunctionResult>
-  runWrapperInJIT(JITTargetAddress FunctionId, ArrayRef<char> ArgBuffer) {
-    return EP.template callB<orcrpctpc::RunWrapper>(
-        FunctionId,
-        ArrayRef<uint8_t>(reinterpret_cast<const uint8_t *>(ArgBuffer.data()),
-                          ArgBuffer.size()));
-  }
-
-private:
-  static shared::detail::CWrapperFunctionResult
-  jitDispatchViaOrcRPCTPCServer(void *Ctx, const void *FnTag, const char *Data,
-                                size_t Size) {
-    assert(Ctx && "Attempt to dispatch with null context ptr");
-    auto R = static_cast<ThisT *>(Ctx)->runWrapperInJIT(
-        pointerToJITTargetAddress(FnTag), {Data, Size});
-    if (!R) {
-      auto ErrMsg = toString(R.takeError());
-      return shared::WrapperFunctionResult::createOutOfBandError(ErrMsg.data())
-          .release();
-    }
-    return R->release();
-  }
-
-  orcrpctpc::ExecutorProcessInfo getExecutorProcessInfo() {
-    return {TripleStr, static_cast<uint32_t>(PageSize),
-            pointerToJITTargetAddress(jitDispatchViaOrcRPCTPCServer),
-            pointerToJITTargetAddress(this)};
-  }
-
-  template <typename WriteT>
-  static void handleWriteUInt(const std::vector<WriteT> &Ws) {
-    using ValueT = decltype(std::declval<WriteT>().Value);
-    for (auto &W : Ws)
-      *jitTargetAddressToPointer<ValueT *>(W.Address) = W.Value;
-  }
-
-  std::string getProtStr(orcrpctpc::WireProtectionFlags WPF) {
-    std::string Result;
-    Result += (WPF & orcrpctpc::WPF_Read) ? 'R' : '-';
-    Result += (WPF & orcrpctpc::WPF_Write) ? 'W' : '-';
-    Result += (WPF & orcrpctpc::WPF_Exec) ? 'X' : '-';
-    return Result;
-  }
-
-  static void handleWriteBuffer(const std::vector<tpctypes::BufferWrite> &Ws) {
-    for (auto &W : Ws) {
-      memcpy(jitTargetAddressToPointer<char *>(W.Address), W.Buffer.data(),
-             W.Buffer.size());
-    }
-  }
-
-  Expected<orcrpctpc::ReserveMemResult>
-  reserveMemory(const orcrpctpc::ReserveMemRequest &Request) {
-    orcrpctpc::ReserveMemResult Allocs;
-    auto PF = sys::Memory::MF_READ | sys::Memory::MF_WRITE;
-
-    uint64_t TotalSize = 0;
-
-    for (const auto &E : Request) {
-      uint64_t Size = alignTo(E.Size, PageSize);
-      uint16_t Align = E.Alignment;
-
-      if ((Align > PageSize) || (PageSize % Align))
-        return make_error<StringError>(
-            "Page alignmen does not satisfy requested alignment",
-            inconvertibleErrorCode());
-
-      TotalSize += Size;
-    }
-
-    // Allocate memory slab.
-    std::error_code EC;
-    auto MB = sys::Memory::allocateMappedMemory(TotalSize, nullptr, PF, EC);
-    if (EC)
-      return make_error<StringError>("Unable to allocate memory: " +
-                                         EC.message(),
-                                     inconvertibleErrorCode());
-
-    // Zero-fill the whole thing.
-    memset(MB.base(), 0, MB.allocatedSize());
-
-    // Carve up sections to return.
-    uint64_t SectionBase = 0;
-    for (const auto &E : Request) {
-      uint64_t SectionSize = alignTo(E.Size, PageSize);
-      Allocs.push_back({E.Prot,
-                        pointerToJITTargetAddress(MB.base()) + SectionBase,
-                        SectionSize});
-      SectionBase += SectionSize;
-    }
-
-    return Allocs;
-  }
-
-  Error finalizeMemory(const orcrpctpc::ReleaseOrFinalizeMemRequest &FMR) {
-    for (const auto &E : FMR) {
-      sys::MemoryBlock MB(jitTargetAddressToPointer<void *>(E.Address), E.Size);
-
-      auto PF = orcrpctpc::fromWireProtectionFlags(E.Prot);
-      if (auto EC =
-              sys::Memory::protectMappedMemory(MB, static_cast<unsigned>(PF)))
-        return make_error<StringError>("error protecting memory: " +
-                                           EC.message(),
-                                       inconvertibleErrorCode());
-    }
-    return Error::success();
-  }
-
-  Error releaseMemory(const orcrpctpc::ReleaseOrFinalizeMemRequest &RMR) {
-    for (const auto &E : RMR) {
-      sys::MemoryBlock MB(jitTargetAddressToPointer<void *>(E.Address), E.Size);
-
-      if (auto EC = sys::Memory::releaseMappedMemory(MB))
-        return make_error<StringError>("error release memory: " + EC.message(),
-                                       inconvertibleErrorCode());
-    }
-    return Error::success();
-  }
-
-  Expected<tpctypes::DylibHandle> loadDylib(const std::string &Path) {
-    std::string ErrMsg;
-    const char *DLPath = !Path.empty() ? Path.c_str() : nullptr;
-    auto DL = sys::DynamicLibrary::getPermanentLibrary(DLPath, &ErrMsg);
-    if (!DL.isValid())
-      return make_error<StringError>(std::move(ErrMsg),
-                                     inconvertibleErrorCode());
-
-    tpctypes::DylibHandle H = Dylibs.size();
-    Dylibs[H] = std::move(DL);
-    return H;
-  }
-
-  Expected<std::vector<tpctypes::LookupResult>>
-  lookupSymbols(const std::vector<orcrpctpc::RemoteLookupRequest> &Request) {
-    std::vector<tpctypes::LookupResult> Result;
-
-    for (const auto &E : Request) {
-      auto I = Dylibs.find(E.first);
-      if (I == Dylibs.end())
-        return make_error<StringError>("Unrecognized handle",
-                                       inconvertibleErrorCode());
-      auto &DL = I->second;
-      Result.push_back({});
-
-      for (const auto &KV : E.second) {
-        auto &SymString = KV.first;
-        bool WeakReference = KV.second;
-
-        const char *Sym = SymString.c_str();
-#ifdef __APPLE__
-        if (*Sym == '_')
-          ++Sym;
-#endif
-
-        void *Addr = DL.getAddressOfSymbol(Sym);
-        if (!Addr && !WeakReference)
-          return make_error<StringError>(Twine("Missing definition for ") + Sym,
-                                         inconvertibleErrorCode());
-
-        Result.back().push_back(pointerToJITTargetAddress(Addr));
-      }
-    }
-
-    return Result;
-  }
-
-  int64_t runMain(JITTargetAddress MainFnAddr,
-                  const std::vector<std::string> &Args) {
-    Optional<StringRef> ProgramNameOverride;
-    if (ProgramName)
-      ProgramNameOverride = *ProgramName;
-
-    return runAsMain(
-        jitTargetAddressToFunction<int (*)(int, char *[])>(MainFnAddr), Args,
-        ProgramNameOverride);
-  }
-
-  shared::WrapperFunctionResult
-  runWrapper(JITTargetAddress WrapperFnAddr,
-             const std::vector<uint8_t> &ArgBuffer) {
-    using WrapperFnTy = shared::detail::CWrapperFunctionResult (*)(
-        const char *Data, uint64_t Size);
-    auto *WrapperFn = jitTargetAddressToFunction<WrapperFnTy>(WrapperFnAddr);
-    return WrapperFn(reinterpret_cast<const char *>(ArgBuffer.data()),
-                     ArgBuffer.size());
-  }
-
-  void closeConnection() { Finished = true; }
-
-  std::string TripleStr;
-  uint64_t PageSize = 0;
-  Optional<std::string> ProgramName;
-  RPCEndpointT &EP;
-  std::atomic<bool> Finished{false};
-  DenseMap<tpctypes::DylibHandle, sys::DynamicLibrary> Dylibs;
-};
-
-} // end namespace orc
-} // end namespace llvm
-
-#endif // LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_ORCRPCTPCSERVER_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.h b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.h
index 3b4aabb90371..735aa53e41fd 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.h
@@ -33,10 +33,26 @@ Error deregisterEHFrameSection(const void *EHFrameSectionAddr,
 } // end namespace orc
 } // end namespace llvm
 
-extern "C" llvm::orc::shared::detail::CWrapperFunctionResult
+/// An eh-frame registration utility suitable for use as a support function
+/// call. This function expects the direct address and size of the eh-frame
+/// section to register as its arguments (it does not treat its arguments as
+/// pointers to an SPS-serialized arg buffer).
+extern "C" llvm::orc::shared::CWrapperFunctionResult
+llvm_orc_registerEHFrameSectionCustomDirectWrapper(
+    const char *EHFrameSectionAddr, uint64_t Size);
+
+/// An eh-frame deregistration utility suitable for use as a support function
+/// call. This function expects the direct address and size of the eh-frame
+/// section to register as its arguments (it does not treat its arguments as
+/// pointers to an SPS-serialized arg buffer).
+extern "C" llvm::orc::shared::CWrapperFunctionResult
+llvm_orc_deregisterEHFrameSectionCustomDirectWrapper(
+    const char *EHFrameSectionAddr, uint64_t Size);
+
+extern "C" llvm::orc::shared::CWrapperFunctionResult
 llvm_orc_registerEHFrameSectionWrapper(const char *Data, uint64_t Size);
 
-extern "C" llvm::orc::shared::detail::CWrapperFunctionResult
+extern "C" llvm::orc::shared::CWrapperFunctionResult
 llvm_orc_deregisterEHFrameSectionWrapper(const char *Data, uint64_t Size);
 
 #endif // LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_REGISTEREHFRAMES_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleExecutorDylibManager.h b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleExecutorDylibManager.h
new file mode 100644
index 000000000000..cbab234f8a2d
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleExecutorDylibManager.h
@@ -0,0 +1,64 @@
+//===--------------- SimpleExecutorDylibManager.h ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// A simple dynamic library management class. Allows dynamic libraries to be
+// loaded and searched.
+//
+// FIXME: The functionality in this file should be moved to the ORC runtime.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_SIMPLEEXECUTORDYLIBMANAGER_H
+#define LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_SIMPLEEXECUTORDYLIBMANAGER_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h"
+#include "llvm/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.h"
+#include "llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h"
+#include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h"
+#include "llvm/ExecutionEngine/Orc/TargetProcess/ExecutorBootstrapService.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/Error.h"
+
+#include <mutex>
+
+namespace llvm {
+namespace orc {
+namespace rt_bootstrap {
+
+/// Simple page-based allocator.
+class SimpleExecutorDylibManager : public ExecutorBootstrapService {
+public:
+  virtual ~SimpleExecutorDylibManager();
+
+  Expected<tpctypes::DylibHandle> open(const std::string &Path, uint64_t Mode);
+  Expected<std::vector<ExecutorAddr>> lookup(tpctypes::DylibHandle H,
+                                             const RemoteSymbolLookupSet &L);
+
+  Error shutdown() override;
+  void addBootstrapSymbols(StringMap<ExecutorAddr> &M) override;
+
+private:
+  using DylibsMap = DenseMap<uint64_t, sys::DynamicLibrary>;
+
+  static llvm::orc::shared::CWrapperFunctionResult
+  openWrapper(const char *ArgData, size_t ArgSize);
+
+  static llvm::orc::shared::CWrapperFunctionResult
+  lookupWrapper(const char *ArgData, size_t ArgSize);
+
+  std::mutex M;
+  uint64_t NextId = 0;
+  DylibsMap Dylibs;
+};
+
+} // end namespace rt_bootstrap
+} // end namespace orc
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_SIMPLEEXECUTORDYLIBMANAGER_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.h b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.h
new file mode 100644
index 000000000000..6858f6d4db6e
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.h
@@ -0,0 +1,70 @@
+//===---------------- SimpleExecutorMemoryManager.h -------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// A simple allocator class suitable for basic remote-JIT use.
+//
+// FIXME: The functionality in this file should be moved to the ORC runtime.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_SIMPLEEXECUTORMEMORYMANAGER_H
+#define LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_SIMPLEEXECUTORMEMORYMANAGER_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h"
+#include "llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h"
+#include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h"
+#include "llvm/ExecutionEngine/Orc/TargetProcess/ExecutorBootstrapService.h"
+#include "llvm/Support/Error.h"
+
+#include <mutex>
+
+namespace llvm {
+namespace orc {
+namespace rt_bootstrap {
+
+/// Simple page-based allocator.
+class SimpleExecutorMemoryManager : public ExecutorBootstrapService {
+public:
+  virtual ~SimpleExecutorMemoryManager();
+
+  Expected<ExecutorAddr> allocate(uint64_t Size);
+  Error finalize(tpctypes::FinalizeRequest &FR);
+  Error deallocate(const std::vector<ExecutorAddr> &Bases);
+
+  Error shutdown() override;
+  void addBootstrapSymbols(StringMap<ExecutorAddr> &M) override;
+
+private:
+  struct Allocation {
+    size_t Size = 0;
+    std::vector<tpctypes::WrapperFunctionCall> DeallocationActions;
+  };
+
+  using AllocationsMap = DenseMap<void *, Allocation>;
+
+  Error deallocateImpl(void *Base, Allocation &A);
+
+  static llvm::orc::shared::CWrapperFunctionResult
+  reserveWrapper(const char *ArgData, size_t ArgSize);
+
+  static llvm::orc::shared::CWrapperFunctionResult
+  finalizeWrapper(const char *ArgData, size_t ArgSize);
+
+  static llvm::orc::shared::CWrapperFunctionResult
+  deallocateWrapper(const char *ArgData, size_t ArgSize);
+
+  std::mutex M;
+  AllocationsMap Allocations;
+};
+
+} // end namespace rt_bootstrap
+} // end namespace orc
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_SIMPLEEXECUTORMEMORYMANAGER_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.h b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.h
new file mode 100644
index 000000000000..afd3d39dbb53
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.h
@@ -0,0 +1,182 @@
+//===---- SimpleRemoteEPCServer.h - EPC over abstract channel ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// EPC over simple abstract channel.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_SIMPLEREMOTEEPCSERVER_H
+#define LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_SIMPLEREMOTEEPCSERVER_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/FunctionExtras.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.h"
+#include "llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h"
+#include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h"
+#include "llvm/ExecutionEngine/Orc/TargetProcess/ExecutorBootstrapService.h"
+#include "llvm/ExecutionEngine/Orc/TargetProcess/SimpleExecutorDylibManager.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/Error.h"
+
+#include <condition_variable>
+#include <future>
+#include <memory>
+#include <mutex>
+
+namespace llvm {
+namespace orc {
+
+/// A simple EPC server implementation.
+class SimpleRemoteEPCServer : public SimpleRemoteEPCTransportClient {
+public:
+  using ReportErrorFunction = unique_function<void(Error)>;
+
+  /// Dispatches calls to runWrapper.
+  class Dispatcher {
+  public:
+    virtual ~Dispatcher();
+    virtual void dispatch(unique_function<void()> Work) = 0;
+    virtual void shutdown() = 0;
+  };
+
+#if LLVM_ENABLE_THREADS
+  class ThreadDispatcher : public Dispatcher {
+  public:
+    void dispatch(unique_function<void()> Work) override;
+    void shutdown() override;
+
+  private:
+    std::mutex DispatchMutex;
+    bool Running = true;
+    size_t Outstanding = 0;
+    std::condition_variable OutstandingCV;
+  };
+#endif
+
+  class Setup {
+    friend class SimpleRemoteEPCServer;
+
+  public:
+    SimpleRemoteEPCServer &server() { return S; }
+    StringMap<ExecutorAddr> &bootstrapSymbols() { return BootstrapSymbols; }
+    std::vector<std::unique_ptr<ExecutorBootstrapService>> &services() {
+      return Services;
+    }
+    void setDispatcher(std::unique_ptr<Dispatcher> D) { S.D = std::move(D); }
+    void setErrorReporter(unique_function<void(Error)> ReportError) {
+      S.ReportError = std::move(ReportError);
+    }
+
+  private:
+    Setup(SimpleRemoteEPCServer &S) : S(S) {}
+    SimpleRemoteEPCServer &S;
+    StringMap<ExecutorAddr> BootstrapSymbols;
+    std::vector<std::unique_ptr<ExecutorBootstrapService>> Services;
+  };
+
+  static StringMap<ExecutorAddr> defaultBootstrapSymbols();
+
+  template <typename TransportT, typename... TransportTCtorArgTs>
+  static Expected<std::unique_ptr<SimpleRemoteEPCServer>>
+  Create(unique_function<Error(Setup &S)> SetupFunction,
+         TransportTCtorArgTs &&...TransportTCtorArgs) {
+    auto Server = std::make_unique<SimpleRemoteEPCServer>();
+    Setup S(*Server);
+    if (auto Err = SetupFunction(S))
+      return std::move(Err);
+
+    // Set ReportError up-front so that it can be used if construction
+    // process fails.
+    if (!Server->ReportError)
+      Server->ReportError = [](Error Err) {
+        logAllUnhandledErrors(std::move(Err), errs(), "SimpleRemoteEPCServer ");
+      };
+
+    // Attempt to create transport.
+    auto T = TransportT::Create(
+        *Server, std::forward<TransportTCtorArgTs>(TransportTCtorArgs)...);
+    if (!T)
+      return T.takeError();
+    Server->T = std::move(*T);
+    if (auto Err = Server->T->start())
+      return std::move(Err);
+
+    // If transport creation succeeds then start up services.
+    Server->Services = std::move(S.services());
+    Server->Services.push_back(
+        std::make_unique<rt_bootstrap::SimpleExecutorDylibManager>());
+    for (auto &Service : Server->Services)
+      Service->addBootstrapSymbols(S.bootstrapSymbols());
+
+    if (auto Err = Server->sendSetupMessage(std::move(S.BootstrapSymbols)))
+      return std::move(Err);
+    return std::move(Server);
+  }
+
+  /// Set an error reporter for this server.
+  void setErrorReporter(ReportErrorFunction ReportError) {
+    this->ReportError = std::move(ReportError);
+  }
+
+  /// Call to handle an incoming message.
+  ///
+  /// Returns 'Disconnect' if the message is a 'detach' message from the remote
+  /// otherwise returns 'Continue'. If the server has moved to an error state,
+  /// returns an error, which should be reported and treated as a 'Disconnect'.
+  Expected<HandleMessageAction>
+  handleMessage(SimpleRemoteEPCOpcode OpC, uint64_t SeqNo, ExecutorAddr TagAddr,
+                SimpleRemoteEPCArgBytesVector ArgBytes) override;
+
+  Error waitForDisconnect();
+
+  void handleDisconnect(Error Err) override;
+
+private:
+  Error sendMessage(SimpleRemoteEPCOpcode OpC, uint64_t SeqNo,
+                    ExecutorAddr TagAddr, ArrayRef<char> ArgBytes);
+
+  Error sendSetupMessage(StringMap<ExecutorAddr> BootstrapSymbols);
+
+  Error handleResult(uint64_t SeqNo, ExecutorAddr TagAddr,
+                     SimpleRemoteEPCArgBytesVector ArgBytes);
+  void handleCallWrapper(uint64_t RemoteSeqNo, ExecutorAddr TagAddr,
+                         SimpleRemoteEPCArgBytesVector ArgBytes);
+
+  shared::WrapperFunctionResult
+  doJITDispatch(const void *FnTag, const char *ArgData, size_t ArgSize);
+
+  static shared::CWrapperFunctionResult jitDispatchEntry(void *DispatchCtx,
+                                                         const void *FnTag,
+                                                         const char *ArgData,
+                                                         size_t ArgSize);
+
+  uint64_t getNextSeqNo() { return NextSeqNo++; }
+  void releaseSeqNo(uint64_t) {}
+
+  using PendingJITDispatchResultsMap =
+      DenseMap<uint64_t, std::promise<shared::WrapperFunctionResult> *>;
+
+  std::mutex ServerStateMutex;
+  std::condition_variable ShutdownCV;
+  enum { ServerRunning, ServerShuttingDown, ServerShutDown } RunState;
+  Error ShutdownErr = Error::success();
+  std::unique_ptr<SimpleRemoteEPCTransport> T;
+  std::unique_ptr<Dispatcher> D;
+  std::vector<std::unique_ptr<ExecutorBootstrapService>> Services;
+  ReportErrorFunction ReportError;
+
+  uint64_t NextSeqNo = 0;
+  PendingJITDispatchResultsMap PendingJITDispatchResults;
+  std::vector<sys::DynamicLibrary> Dylibs;
+};
+
+} // end namespace orc
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_SIMPLEREMOTEEPCSERVER_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TaskDispatch.h b/llvm/include/llvm/ExecutionEngine/Orc/TaskDispatch.h
new file mode 100644
index 000000000000..c57264e59655
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/Orc/TaskDispatch.h
@@ -0,0 +1,131 @@
+//===--------- TaskDispatch.h - ORC task dispatch utils ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Task and TaskDispatch classes.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_TASKDISPATCH_H
+#define LLVM_EXECUTIONENGINE_ORC_TASKDISPATCH_H
+
+#include "llvm/Config/llvm-config.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ExtensibleRTTI.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <cassert>
+#include <string>
+
+#if LLVM_ENABLE_THREADS
+#include <condition_variable>
+#include <mutex>
+#include <thread>
+#endif
+
+namespace llvm {
+namespace orc {
+
+/// Represents an abstract task for ORC to run.
+class Task : public RTTIExtends<Task, RTTIRoot> {
+public:
+  static char ID;
+
+  virtual ~Task() {}
+
+  /// Description of the task to be performed. Used for logging.
+  virtual void printDescription(raw_ostream &OS) = 0;
+
+  /// Run the task.
+  virtual void run() = 0;
+
+private:
+  void anchor() override;
+};
+
+/// Base class for generic tasks.
+class GenericNamedTask : public RTTIExtends<GenericNamedTask, Task> {
+public:
+  static char ID;
+  static const char *DefaultDescription;
+};
+
+/// Generic task implementation.
+template <typename FnT> class GenericNamedTaskImpl : public GenericNamedTask {
+public:
+  GenericNamedTaskImpl(FnT &&Fn, std::string DescBuffer)
+      : Fn(std::forward<FnT>(Fn)), Desc(DescBuffer.c_str()),
+        DescBuffer(std::move(DescBuffer)) {}
+  GenericNamedTaskImpl(FnT &&Fn, const char *Desc)
+      : Fn(std::forward<FnT>(Fn)), Desc(Desc) {
+    assert(Desc && "Description cannot be null");
+  }
+  void printDescription(raw_ostream &OS) override { OS << Desc; }
+  void run() override { Fn(); }
+
+private:
+  FnT Fn;
+  const char *Desc;
+  std::string DescBuffer;
+};
+
+/// Create a generic named task from a std::string description.
+template <typename FnT>
+std::unique_ptr<GenericNamedTask> makeGenericNamedTask(FnT &&Fn,
+                                                       std::string Desc) {
+  return std::make_unique<GenericNamedTaskImpl<FnT>>(std::forward<FnT>(Fn),
+                                                     std::move(Desc));
+}
+
+/// Create a generic named task from a const char * description.
+template <typename FnT>
+std::unique_ptr<GenericNamedTask>
+makeGenericNamedTask(FnT &&Fn, const char *Desc = nullptr) {
+  if (!Desc)
+    Desc = GenericNamedTask::DefaultDescription;
+  return std::make_unique<GenericNamedTaskImpl<FnT>>(std::forward<FnT>(Fn),
+                                                     Desc);
+}
+
+/// Abstract base for classes that dispatch ORC Tasks.
+class TaskDispatcher {
+public:
+  virtual ~TaskDispatcher();
+
+  /// Run the given task.
+  virtual void dispatch(std::unique_ptr<Task> T) = 0;
+
+  /// Called by ExecutionSession. Waits until all tasks have completed.
+  virtual void shutdown() = 0;
+};
+
+/// Runs all tasks on the current thread.
+class InPlaceTaskDispatcher : public TaskDispatcher {
+public:
+  void dispatch(std::unique_ptr<Task> T) override;
+  void shutdown() override;
+};
+
+#if LLVM_ENABLE_THREADS
+
+class DynamicThreadPoolTaskDispatcher : public TaskDispatcher {
+public:
+  void dispatch(std::unique_ptr<Task> T) override;
+  void shutdown() override;
+private:
+  std::mutex DispatchMutex;
+  bool Running = true;
+  size_t Outstanding = 0;
+  std::condition_variable OutstandingCV;
+};
+
+#endif // LLVM_ENABLE_THREADS
+
+} // End namespace orc
+} // End namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_TASKDISPATCH_H
diff --git a/llvm/include/llvm/ExecutionEngine/OrcMCJITReplacement.h b/llvm/include/llvm/ExecutionEngine/OrcMCJITReplacement.h
deleted file mode 100644
index 6cca1933f39f..000000000000
--- a/llvm/include/llvm/ExecutionEngine/OrcMCJITReplacement.h
+++ /dev/null
@@ -1,37 +0,0 @@
-//===---- OrcMCJITReplacement.h - Orc-based MCJIT replacement ---*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file forces OrcMCJITReplacement to link in on certain operating systems.
-// (Windows).
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_EXECUTIONENGINE_ORCMCJITREPLACEMENT_H
-#define LLVM_EXECUTIONENGINE_ORCMCJITREPLACEMENT_H
-
-#include "llvm/ExecutionEngine/ExecutionEngine.h"
-#include <cstdlib>
-
-extern "C" void LLVMLinkInOrcMCJITReplacement();
-
-namespace {
-  struct ForceOrcMCJITReplacementLinking {
-    ForceOrcMCJITReplacementLinking() {
-      // We must reference OrcMCJITReplacement in such a way that compilers will
-      // not delete it all as dead code, even with whole program optimization,
-      // yet is effectively a NO-OP. As the compiler isn't smart enough to know
-      // that getenv() never returns -1, this will do the job.
-      if (std::getenv("bar") != (char*) -1)
-        return;
-
-      LLVMLinkInOrcMCJITReplacement();
-    }
-  } ForceOrcMCJITReplacementLinking;
-}
-
-#endif
diff --git a/llvm/include/llvm/ExecutionEngine/OrcV1Deprecation.h b/llvm/include/llvm/ExecutionEngine/OrcV1Deprecation.h
deleted file mode 100644
index 7ed254b3ee04..000000000000
--- a/llvm/include/llvm/ExecutionEngine/OrcV1Deprecation.h
+++ /dev/null
@@ -1,22 +0,0 @@
-//===------ OrcV1Deprecation.h - Memory manager for MC-JIT ------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Tag for suppressing ORCv1 deprecation warnings.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_EXECUTIONENGINE_ORCV1DEPRECATION_H
-#define LLVM_EXECUTIONENGINE_ORCV1DEPRECATION_H
-
-namespace llvm {
-
-enum ORCv1DeprecationAcknowledgement { AcknowledgeORCv1Deprecation };
-
-} // namespace llvm
-
-#endif // LLVM_EXECUTIONENGINE_ORCV1DEPRECATION_H
diff --git a/llvm/include/llvm/ExecutionEngine/RuntimeDyld.h b/llvm/include/llvm/ExecutionEngine/RuntimeDyld.h
index 128c9967a596..c434b45077a3 100644
--- a/llvm/include/llvm/ExecutionEngine/RuntimeDyld.h
+++ b/llvm/include/llvm/ExecutionEngine/RuntimeDyld.h
@@ -112,6 +112,20 @@ public:
                                          StringRef SectionName,
                                          bool IsReadOnly) = 0;
 
+    /// An allocated TLS section
+    struct TLSSection {
+      /// The pointer to the initialization image
+      uint8_t *InitializationImage;
+      /// The TLS offset
+      intptr_t Offset;
+    };
+
+    /// Allocate a memory block of (at least) the given size to be used for
+    /// thread-local storage (TLS).
+    virtual TLSSection allocateTLSSection(uintptr_t Size, unsigned Alignment,
+                                          unsigned SectionID,
+                                          StringRef SectionName);
+
     /// Inform the memory manager about the total amount of memory required to
     /// allocate all sections to be loaded:
     /// \p CodeSize - the total size of all code sections
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td
index 3dc6194c7830..5ee379b7fcad 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMP.td
+++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td
@@ -144,6 +144,26 @@ def OMPC_Schedule : Clause<"schedule"> {
   ];
 }
 
+def OMP_MEMORY_ORDER_SeqCst : ClauseVal<"seq_cst", 1, 1> {}
+def OMP_MEMORY_ORDER_AcqRel : ClauseVal<"acq_rel", 2, 1> {}
+def OMP_MEMORY_ORDER_Acquire : ClauseVal<"acquire", 3, 1> {}
+def OMP_MEMORY_ORDER_Release : ClauseVal<"release", 4, 1> {}
+def OMP_MEMORY_ORDER_Relaxed : ClauseVal<"relaxed", 5, 1> {}
+def OMP_MEMORY_ORDER_Default : ClauseVal<"default", 6, 0> {
+  let isDefault = 1;
+}
+def OMPC_MemoryOrder : Clause<"memory_order"> {
+  let enumClauseValue = "MemoryOrderKind";
+  let allowedClauseValues = [
+    OMP_MEMORY_ORDER_SeqCst,
+    OMP_MEMORY_ORDER_AcqRel,
+    OMP_MEMORY_ORDER_Acquire,
+    OMP_MEMORY_ORDER_Release,
+    OMP_MEMORY_ORDER_Relaxed,
+    OMP_MEMORY_ORDER_Default
+  ];
+}
+
 def OMPC_Ordered : Clause<"ordered"> {
   let clangClass = "OMPOrderedClause";
   let flangClass = "ScalarIntConstantExpr";
@@ -261,13 +281,17 @@ def OMPC_Allocate : Clause<"allocate"> {
 }
 def OMPC_NonTemporal : Clause<"nontemporal"> {
   let clangClass = "OMPNontemporalClause";
+  let flangClass = "Name";
+  let isValueList = true; 
 }
 
-def OMP_ORDER_concurrent : ClauseVal<"default",2,0> { let isDefault = 1; }
+def OMP_ORDER_concurrent : ClauseVal<"concurrent",1,1> {}
+def OMP_ORDER_unknown : ClauseVal<"unknown",2,0> { let isDefault = 1; }
 def OMPC_Order : Clause<"order"> {
   let clangClass = "OMPOrderClause";
   let enumClauseValue = "OrderKind";
   let allowedClauseValues = [
+    OMP_ORDER_unknown,
     OMP_ORDER_concurrent
   ];
 }
@@ -312,6 +336,8 @@ def OMPC_Uniform : Clause<"uniform"> {
 }
 def OMPC_DeviceType : Clause<"device_type"> {}
 def OMPC_Match : Clause<"match"> {}
+def OMPC_AdjustArgs : Clause<"adjust_args"> { }
+def OMPC_AppendArgs : Clause<"append_args"> { }
 def OMPC_Depobj : Clause<"depobj"> {
   let clangClass = "OMPDepobjClause";
   let isImplicit = true;
@@ -337,6 +363,14 @@ def OMPC_Filter : Clause<"filter"> {
   let clangClass = "OMPFilterClause";
   let flangClass = "ScalarIntExpr";
 }
+def OMPC_Align : Clause<"align"> {
+  let clangClass = "OMPAlignClause";
+}
+def OMPC_When: Clause<"when"> {}
+
+def OMPC_Bind : Clause<"bind"> {
+  let clangClass = "OMPBindClause";
+}
 
 //===----------------------------------------------------------------------===//
 // Definition of OpenMP directives
@@ -473,8 +507,8 @@ def OMP_TaskWait : Directive<"taskwait"> {
 }
 def OMP_TaskGroup : Directive<"taskgroup"> {
   let allowedClauses = [
-    VersionedClause<OMPC_TaskReduction>,
-    VersionedClause<OMPC_Allocate>
+    VersionedClause<OMPC_TaskReduction, 50>,
+    VersionedClause<OMPC_Allocate, 50>
   ];
 }
 def OMP_Flush : Directive<"flush"> {
@@ -489,10 +523,12 @@ def OMP_Flush : Directive<"flush"> {
 }
 def OMP_Ordered : Directive<"ordered"> {
   let allowedClauses = [
-    VersionedClause<OMPC_Threads>,
-    VersionedClause<OMPC_Simd>,
     VersionedClause<OMPC_Depend>
   ];
+  let allowedOnceClauses = [
+    VersionedClause<OMPC_Threads>,
+    VersionedClause<OMPC_Simd>
+  ];
 }
 def OMP_Atomic : Directive<"atomic"> {
   let allowedClauses = [
@@ -1506,13 +1542,18 @@ def OMP_TargetTeamsDistributeSimd :
 }
 def OMP_Allocate : Directive<"allocate"> {
   let allowedOnceClauses = [
-    VersionedClause<OMPC_Allocator>
+    VersionedClause<OMPC_Allocator>,
+    VersionedClause<OMPC_Align, 51>
   ];
 }
 def OMP_DeclareVariant : Directive<"declare variant"> {
   let allowedClauses = [
     VersionedClause<OMPC_Match>
   ];
+  let allowedExclusiveClauses = [
+    VersionedClause<OMPC_AdjustArgs, 51>,
+    VersionedClause<OMPC_AppendArgs, 51>
+  ];
 }
 def OMP_MasterTaskloop : Directive<"master taskloop"> {
   let allowedClauses = [
@@ -1699,6 +1740,22 @@ def OMP_masked : Directive<"masked"> {
     VersionedClause<OMPC_Filter>
   ];
 }
+def OMP_loop : Directive<"loop"> {
+  let allowedClauses = [
+    VersionedClause<OMPC_LastPrivate>,
+    VersionedClause<OMPC_Private>,
+    VersionedClause<OMPC_Reduction>,
+  ];
+  let allowedOnceClauses = [
+    VersionedClause<OMPC_Bind, 50>,
+    VersionedClause<OMPC_Collapse>,
+    VersionedClause<OMPC_Order>,
+  ];
+}
+def OMP_Metadirective : Directive<"metadirective"> {
+  let allowedClauses = [VersionedClause<OMPC_When>];
+  let allowedOnceClauses = [VersionedClause<OMPC_Default>];
+}
 def OMP_Unknown : Directive<"unknown"> {
   let isDefault = true;
 }
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h
index d174cc8992dd..2fec3e7e4230 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h
@@ -128,6 +128,14 @@ enum class OMPScheduleType {
   LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue */ ModifierMask)
 };
 
+enum OMPTgtExecModeFlags : int8_t {
+  OMP_TGT_EXEC_MODE_GENERIC = 1 << 0,
+  OMP_TGT_EXEC_MODE_SPMD = 1 << 1,
+  OMP_TGT_EXEC_MODE_GENERIC_SPMD =
+      OMP_TGT_EXEC_MODE_GENERIC | OMP_TGT_EXEC_MODE_SPMD,
+  LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue */ OMP_TGT_EXEC_MODE_GENERIC_SPMD)
+};
+
 } // end namespace omp
 
 } // end namespace llvm
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h b/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h
index 0b6aed1e9e12..89f5de229b3b 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h
@@ -29,100 +29,89 @@ namespace omp {
 ///
 /// Example usage in clang:
 ///   const unsigned slot_size =
-///   ctx.GetTargetInfo().getGridValue(llvm::omp::GVIDX::GV_Warp_Size);
+///   ctx.GetTargetInfo().getGridValue().GV_Warp_Size;
 ///
 /// Example usage in libomptarget/deviceRTLs:
 ///   #include "llvm/Frontend/OpenMP/OMPGridValues.h"
 ///   #ifdef __AMDGPU__
-///     #define GRIDVAL AMDGPUGpuGridValues
+///     #define GRIDVAL AMDGPUGridValues
 ///   #else
-///     #define GRIDVAL NVPTXGpuGridValues
+///     #define GRIDVAL NVPTXGridValues
 ///   #endif
 ///   ... Then use this reference for GV_Warp_Size in the deviceRTL source.
-///   llvm::omp::GRIDVAL[llvm::omp::GVIDX::GV_Warp_Size]
+///   llvm::omp::GRIDVAL().GV_Warp_Size
 ///
 /// Example usage in libomptarget hsa plugin:
 ///   #include "llvm/Frontend/OpenMP/OMPGridValues.h"
-///   #define GRIDVAL AMDGPUGpuGridValues
+///   #define GRIDVAL AMDGPUGridValues
 ///   ... Then use this reference to access GV_Warp_Size in the hsa plugin.
-///   llvm::omp::GRIDVAL[llvm::omp::GVIDX::GV_Warp_Size]
+///   llvm::omp::GRIDVAL().GV_Warp_Size
 ///
 /// Example usage in libomptarget cuda plugin:
 ///    #include "llvm/Frontend/OpenMP/OMPGridValues.h"
-///    #define GRIDVAL NVPTXGpuGridValues
+///    #define GRIDVAL NVPTXGridValues
 ///   ... Then use this reference to access GV_Warp_Size in the cuda plugin.
-///    llvm::omp::GRIDVAL[llvm::omp::GVIDX::GV_Warp_Size]
+///    llvm::omp::GRIDVAL().GV_Warp_Size
 ///
-enum GVIDX {
-  /// The maximum number of workers in a kernel.
-  /// (THREAD_ABSOLUTE_LIMIT) - (GV_Warp_Size), might be issue for blockDim.z
-  GV_Threads,
+
+struct GV {
   /// The size reserved for data in a shared memory slot.
-  GV_Slot_Size,
+  const unsigned GV_Slot_Size;
   /// The default value of maximum number of threads in a worker warp.
-  GV_Warp_Size,
-  /// Alternate warp size for some AMDGCN architectures. Same as GV_Warp_Size
-  /// for NVPTX.
-  GV_Warp_Size_32,
-  /// The number of bits required to represent the max number of threads in warp
-  GV_Warp_Size_Log2,
-  /// GV_Warp_Size * GV_Slot_Size,
-  GV_Warp_Slot_Size,
+  const unsigned GV_Warp_Size;
+
+  constexpr unsigned warpSlotSize() const {
+    return GV_Warp_Size * GV_Slot_Size;
+  }
+
   /// the maximum number of teams.
-  GV_Max_Teams,
-  /// Global Memory Alignment
-  GV_Mem_Align,
-  /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
-  GV_Warp_Size_Log2_Mask,
+  const unsigned GV_Max_Teams;
   // An alternative to the heavy data sharing infrastructure that uses global
   // memory is one that uses device __shared__ memory.  The amount of such space
   // (in bytes) reserved by the OpenMP runtime is noted here.
-  GV_SimpleBufferSize,
+  const unsigned GV_SimpleBufferSize;
   // The absolute maximum team size for a working group
-  GV_Max_WG_Size,
+  const unsigned GV_Max_WG_Size;
   // The default maximum team size for a working group
-  GV_Default_WG_Size,
-  // This is GV_Max_WG_Size / GV_WarpSize. 32 for NVPTX and 16 for AMDGCN.
-  GV_Max_Warp_Number,
-  /// The slot size that should be reserved for a working warp.
-  /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
-  GV_Warp_Size_Log2_MaskL
+  const unsigned GV_Default_WG_Size;
+
+  constexpr unsigned maxWarpNumber() const {
+    return GV_Max_WG_Size / GV_Warp_Size;
+  }
 };
 
 /// For AMDGPU GPUs
-static constexpr unsigned AMDGPUGpuGridValues[] = {
-    448,       // GV_Threads
-    256,       // GV_Slot_Size
-    64,        // GV_Warp_Size
-    32,        // GV_Warp_Size_32
-    6,         // GV_Warp_Size_Log2
-    64 * 256,  // GV_Warp_Slot_Size
-    128,       // GV_Max_Teams
-    256,       // GV_Mem_Align
-    63,        // GV_Warp_Size_Log2_Mask
-    896,       // GV_SimpleBufferSize
-    1024,      // GV_Max_WG_Size,
-    256,       // GV_Defaut_WG_Size
-    1024 / 64, // GV_Max_WG_Size / GV_WarpSize
-    63         // GV_Warp_Size_Log2_MaskL
+static constexpr GV AMDGPUGridValues64 = {
+    256,  // GV_Slot_Size
+    64,   // GV_Warp_Size
+    128,  // GV_Max_Teams
+    896,  // GV_SimpleBufferSize
+    1024, // GV_Max_WG_Size,
+    256,  // GV_Default_WG_Size
 };
 
+static constexpr GV AMDGPUGridValues32 = {
+    256,  // GV_Slot_Size
+    32,   // GV_Warp_Size
+    128,  // GV_Max_Teams
+    896,  // GV_SimpleBufferSize
+    1024, // GV_Max_WG_Size,
+    256,  // GV_Default_WG_Size
+};
+
+template <unsigned wavesize> constexpr const GV &getAMDGPUGridValues() {
+  static_assert(wavesize == 32 || wavesize == 64, "");
+  return wavesize == 32 ? AMDGPUGridValues32 : AMDGPUGridValues64;
+}
+
 /// For Nvidia GPUs
-static constexpr unsigned NVPTXGpuGridValues[] = {
-    992,               // GV_Threads
-    256,               // GV_Slot_Size
-    32,                // GV_Warp_Size
-    32,                // GV_Warp_Size_32
-    5,                 // GV_Warp_Size_Log2
-    32 * 256,          // GV_Warp_Slot_Size
-    1024,              // GV_Max_Teams
-    256,               // GV_Mem_Align
-    (~0u >> (32 - 5)), // GV_Warp_Size_Log2_Mask
-    896,               // GV_SimpleBufferSize
-    1024,              // GV_Max_WG_Size
-    128,               // GV_Defaut_WG_Size
-    1024 / 32,         // GV_Max_WG_Size / GV_WarpSize
-    31                 // GV_Warp_Size_Log2_MaskL
+static constexpr GV NVPTXGridValues = {
+    256,  // GV_Slot_Size
+    32,   // GV_Warp_Size
+    1024, // GV_Max_Teams
+    896,  // GV_SimpleBufferSize
+    1024, // GV_Max_WG_Size
+    128,  // GV_Default_WG_Size
 };
 
 } // namespace omp
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 8144f1527a06..563e0eed1762 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -257,18 +257,17 @@ public:
   ///
   ///  * Sign of the step and the comparison operator might disagree:
   ///
-  ///      for (int i = 0; i < 42; --i)
+  ///      for (int i = 0; i < 42; i -= 1u)
   ///
   //
   /// \param Loc       The insert and source location description.
   /// \param BodyGenCB Callback that will generate the loop body code.
   /// \param Start     Value of the loop counter for the first iterations.
-  /// \param Stop      Loop counter values past this will stop the the
-  ///                  iterations.
+  /// \param Stop      Loop counter values past this will stop the loop.
   /// \param Step      Loop counter increment after each iteration; negative
-  ///                  means counting down. \param IsSigned  Whether Start, Stop
-  ///                  and Stop are signed integers.
-  /// \param InclusiveStop Whether  \p Stop itself is a valid value for the loop
+  ///                  means counting down.
+  /// \param IsSigned  Whether Start, Stop and Step are signed integers.
+  /// \param InclusiveStop Whether \p Stop itself is a valid value for the loop
   ///                      counter.
   /// \param ComputeIP Insertion point for instructions computing the trip
   ///                  count. Can be used to ensure the trip count is available
@@ -335,7 +334,7 @@ public:
   ///    has a trip count of 0). This is permitted by the OpenMP specification.
   ///
   /// \param DL        Debug location for instructions added for collapsing,
-  ///                  such as instructions to compute derive the input loop's
+  ///                  such as instructions to compute/derive the input loop's
   ///                  induction variables.
   /// \param Loops     Loops in the loop nest to collapse. Loops are specified
   ///                  from outermost-to-innermost and every control flow of a
@@ -358,8 +357,16 @@ public:
   /// the current thread, updates the relevant instructions in the canonical
   /// loop and calls to an OpenMP runtime finalization function after the loop.
   ///
-  /// \param Loc      The source location description, the insertion location
-  ///                 is not used.
+  /// TODO: Workshare loops with static scheduling may contain up to two loops
+  /// that fulfill the requirements of an OpenMP canonical loop. One for
+  /// iterating over all iterations of a chunk and another one for iterating
+  /// over all chunks that are executed on the same thread. Returning
+  /// CanonicalLoopInfo objects representing them may eventually be useful for
+  /// the apply clause planned in OpenMP 6.0, but currently whether these are
+  /// canonical loops is irrelevant.
+  ///
+  /// \param DL       Debug location for instructions added for the
+  ///                 workshare-loop construct itself.
   /// \param CLI      A descriptor of the canonical loop to workshare.
   /// \param AllocaIP An insertion point for Alloca instructions usable in the
   ///                 preheader of the loop.
@@ -368,12 +375,11 @@ public:
   /// \param Chunk    The size of loop chunk considered as a unit when
   ///                 scheduling. If \p nullptr, defaults to 1.
   ///
-  /// \returns Updated CanonicalLoopInfo.
-  CanonicalLoopInfo *createStaticWorkshareLoop(const LocationDescription &Loc,
-                                               CanonicalLoopInfo *CLI,
-                                               InsertPointTy AllocaIP,
-                                               bool NeedsBarrier,
-                                               Value *Chunk = nullptr);
+  /// \returns Point where to insert code after the workshare construct.
+  InsertPointTy applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
+                                         InsertPointTy AllocaIP,
+                                         bool NeedsBarrier,
+                                         Value *Chunk = nullptr);
 
   /// Modifies the canonical loop to be a dynamically-scheduled workshare loop.
   ///
@@ -382,8 +388,9 @@ public:
   /// turn it into a workshare loop. In particular, it calls to an OpenMP
   /// runtime function in the preheader to obtain, and then in each iteration
   /// to update the loop counter.
-  /// \param Loc      The source location description, the insertion location
-  ///                 is not used.
+  ///
+  /// \param DL       Debug location for instructions added for the
+  ///                 workshare-loop construct itself.
   /// \param CLI      A descriptor of the canonical loop to workshare.
   /// \param AllocaIP An insertion point for Alloca instructions usable in the
   ///                 preheader of the loop.
@@ -393,13 +400,12 @@ public:
   /// \param Chunk    The size of loop chunk considered as a unit when
   ///                 scheduling. If \p nullptr, defaults to 1.
   ///
-  /// \returns Point where to insert code after the loop.
-  InsertPointTy createDynamicWorkshareLoop(const LocationDescription &Loc,
-                                           CanonicalLoopInfo *CLI,
-                                           InsertPointTy AllocaIP,
-                                           omp::OMPScheduleType SchedType,
-                                           bool NeedsBarrier,
-                                           Value *Chunk = nullptr);
+  /// \returns Point where to insert code after the workshare construct.
+  InsertPointTy applyDynamicWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
+                                          InsertPointTy AllocaIP,
+                                          omp::OMPScheduleType SchedType,
+                                          bool NeedsBarrier,
+                                          Value *Chunk = nullptr);
 
   /// Modifies the canonical loop to be a workshare loop.
   ///
@@ -410,19 +416,17 @@ public:
   /// the current thread, updates the relevant instructions in the canonical
   /// loop and calls to an OpenMP runtime finalization function after the loop.
   ///
-  /// \param Loc      The source location description, the insertion location
-  ///                 is not used.
+  /// \param DL       Debug location for instructions added for the
+  ///                 workshare-loop construct itself.
   /// \param CLI      A descriptor of the canonical loop to workshare.
   /// \param AllocaIP An insertion point for Alloca instructions usable in the
   ///                 preheader of the loop.
   /// \param NeedsBarrier Indicates whether a barrier must be insterted after
   ///                     the loop.
   ///
-  /// \returns Updated CanonicalLoopInfo.
-  CanonicalLoopInfo *createWorkshareLoop(const LocationDescription &Loc,
-                                         CanonicalLoopInfo *CLI,
-                                         InsertPointTy AllocaIP,
-                                         bool NeedsBarrier);
+  /// \returns Point where to insert code after the workshare construct.
+  InsertPointTy applyWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
+                                   InsertPointTy AllocaIP, bool NeedsBarrier);
 
   /// Tile a loop nest.
   ///
@@ -471,6 +475,48 @@ public:
   tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
             ArrayRef<Value *> TileSizes);
 
+  /// Fully unroll a loop.
+  ///
+  /// Instead of unrolling the loop immediately (and duplicating its body
+  /// instructions), it is deferred to LLVM's LoopUnrollPass by adding loop
+  /// metadata.
+  ///
+  /// \param DL   Debug location for instructions added by unrolling.
+  /// \param Loop The loop to unroll. The loop will be invalidated.
+  void unrollLoopFull(DebugLoc DL, CanonicalLoopInfo *Loop);
+
+  /// Fully or partially unroll a loop. How the loop is unrolled is determined
+  /// using LLVM's LoopUnrollPass.
+  ///
+  /// \param DL   Debug location for instructions added by unrolling.
+  /// \param Loop The loop to unroll. The loop will be invalidated.
+  void unrollLoopHeuristic(DebugLoc DL, CanonicalLoopInfo *Loop);
+
+  /// Partially unroll a loop.
+  ///
+  /// The CanonicalLoopInfo of the unrolled loop for use with chained
+  /// loop-associated directive can be requested using \p UnrolledCLI. Not
+  /// needing the CanonicalLoopInfo allows more efficient code generation by
+  /// deferring the actual unrolling to the LoopUnrollPass using loop metadata.
+  /// A loop-associated directive applied to the unrolled loop needs to know the
+  /// new trip count which means that if using a heuristically determined unroll
+  /// factor (\p Factor == 0), that factor must be computed immediately. We are
+  /// using the same logic as the LoopUnrollPass to derived the unroll factor,
+  /// but which assumes that some canonicalization has taken place (e.g.
+  /// Mem2Reg, LICM, GVN, Inlining, etc.). That is, the heuristic will perform
+  /// better when the unrolled loop's CanonicalLoopInfo is not needed.
+  ///
+  /// \param DL          Debug location for instructions added by unrolling.
+  /// \param Loop        The loop to unroll. The loop will be invalidated.
+  /// \param Factor      The factor to unroll the loop by. A factor of 0
+  ///                    indicates that a heuristic should be used to determine
+  ///                    the unroll-factor.
+  /// \param UnrolledCLI If non-null, receives the CanonicalLoopInfo of the
+  ///                    partially unrolled loop. Otherwise, uses loop metadata
+  ///                    to defer unrolling to the LoopUnrollPass.
+  void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor,
+                         CanonicalLoopInfo **UnrolledCLI);
+
   /// Generator for '#omp flush'
   ///
   /// \param Loc The location where the flush directive was encountered
@@ -486,6 +532,115 @@ public:
   /// \param Loc The location where the taskyield directive was encountered.
   void createTaskyield(const LocationDescription &Loc);
 
+  /// Functions used to generate reductions. Such functions take two Values
+  /// representing LHS and RHS of the reduction, respectively, and a reference
+  /// to the value that is updated to refer to the reduction result.
+  using ReductionGenTy =
+      function_ref<InsertPointTy(InsertPointTy, Value *, Value *, Value *&)>;
+
+  /// Functions used to generate atomic reductions. Such functions take two
+  /// Values representing pointers to LHS and RHS of the reduction. They are
+  /// expected to atomically update the LHS to the reduced value.
+  using AtomicReductionGenTy =
+      function_ref<InsertPointTy(InsertPointTy, Value *, Value *)>;
+
+  /// Information about an OpenMP reduction.
+  struct ReductionInfo {
+    ReductionInfo(Value *Variable, Value *PrivateVariable,
+                  ReductionGenTy ReductionGen,
+                  AtomicReductionGenTy AtomicReductionGen)
+        : Variable(Variable), PrivateVariable(PrivateVariable),
+          ReductionGen(ReductionGen), AtomicReductionGen(AtomicReductionGen) {}
+
+    /// Returns the type of the element being reduced.
+    Type *getElementType() const {
+      return Variable->getType()->getPointerElementType();
+    }
+
+    /// Reduction variable of pointer type.
+    Value *Variable;
+
+    /// Thread-private partial reduction variable.
+    Value *PrivateVariable;
+
+    /// Callback for generating the reduction body. The IR produced by this will
+    /// be used to combine two values in a thread-safe context, e.g., under
+    /// lock or within the same thread, and therefore need not be atomic.
+    ReductionGenTy ReductionGen;
+
+    /// Callback for generating the atomic reduction body, may be null. The IR
+    /// produced by this will be used to atomically combine two values during
+    /// reduction. If null, the implementation will use the non-atomic version
+    /// along with the appropriate synchronization mechanisms.
+    AtomicReductionGenTy AtomicReductionGen;
+  };
+
+  // TODO: provide atomic and non-atomic reduction generators for reduction
+  // operators defined by the OpenMP specification.
+
+  /// Generator for '#omp reduction'.
+  ///
+  /// Emits the IR instructing the runtime to perform the specific kind of
+  /// reductions. Expects reduction variables to have been privatized and
+  /// initialized to reduction-neutral values separately. Emits the calls to
+  /// runtime functions as well as the reduction function and the basic blocks
+  /// performing the reduction atomically and non-atomically.
+  ///
+  /// The code emitted for the following:
+  ///
+  /// \code
+  ///   type var_1;
+  ///   type var_2;
+  ///   #pragma omp <directive> reduction(reduction-op:var_1,var_2)
+  ///   /* body */;
+  /// \endcode
+  ///
+  /// corresponds to the following sketch.
+  ///
+  /// \code
+  /// void _outlined_par() {
+  ///   // N is the number of different reductions.
+  ///   void *red_array[] = {privatized_var_1, privatized_var_2, ...};
+  ///   switch(__kmpc_reduce(..., N, /*size of data in red array*/, red_array,
+  ///                        _omp_reduction_func,
+  ///                        _gomp_critical_user.reduction.var)) {
+  ///   case 1: {
+  ///     var_1 = var_1 <reduction-op> privatized_var_1;
+  ///     var_2 = var_2 <reduction-op> privatized_var_2;
+  ///     // ...
+  ///    __kmpc_end_reduce(...);
+  ///     break;
+  ///   }
+  ///   case 2: {
+  ///     _Atomic<ReductionOp>(var_1, privatized_var_1);
+  ///     _Atomic<ReductionOp>(var_2, privatized_var_2);
+  ///     // ...
+  ///     break;
+  ///   }
+  ///   default: break;
+  ///   }
+  /// }
+  ///
+  /// void _omp_reduction_func(void **lhs, void **rhs) {
+  ///   *(type *)lhs[0] = *(type *)lhs[0] <reduction-op> *(type *)rhs[0];
+  ///   *(type *)lhs[1] = *(type *)lhs[1] <reduction-op> *(type *)rhs[1];
+  ///   // ...
+  /// }
+  /// \endcode
+  ///
+  /// \param Loc                The location where the reduction was
+  ///                           encountered. Must be within the associate
+  ///                           directive and after the last local access to the
+  ///                           reduction variables.
+  /// \param AllocaIP           An insertion point suitable for allocas usable
+  ///                           in reductions.
+  /// \param ReductionInfos     A list of info on each reduction variable.
+  /// \param IsNoWait           A flag set if the reduction is marked as nowait.
+  InsertPointTy createReductions(const LocationDescription &Loc,
+                                 InsertPointTy AllocaIP,
+                                 ArrayRef<ReductionInfo> ReductionInfos,
+                                 bool IsNoWait = false);
+
   ///}
 
   /// Return the insertion point used by the underlying IRBuilder.
@@ -515,6 +670,10 @@ public:
   Constant *getOrCreateSrcLocStr(StringRef FunctionName, StringRef FileName,
                                  unsigned Line, unsigned Column);
 
+  /// Return the (LLVM-IR) string describing the DebugLoc \p DL. Use \p F as
+  /// fallback if \p DL does not specify the function name.
+  Constant *getOrCreateSrcLocStr(DebugLoc DL, Function *F = nullptr);
+
   /// Return the (LLVM-IR) string describing the source location \p Loc.
   Constant *getOrCreateSrcLocStr(const LocationDescription &Loc);
 
@@ -524,8 +683,8 @@ public:
                           omp::IdentFlag Flags = omp::IdentFlag(0),
                           unsigned Reserve2Flags = 0);
 
-  // Get the type corresponding to __kmpc_impl_lanemask_t from the deviceRTL
-  Type *getLanemaskType();
+  /// Create a global flag \p Namein the module with initial value \p Value.
+  GlobalValue *createGlobalFlag(unsigned Value, StringRef Name);
 
   /// Generate control flow and cleanup for cancellation.
   ///
@@ -651,11 +810,11 @@ public:
   /// \param Loc The source location description.
   /// \param MapperFunc Function to be called.
   /// \param SrcLocInfo Source location information global.
-  /// \param MaptypesArgs
-  /// \param MapnamesArg
+  /// \param MaptypesArg The argument types.
+  /// \param MapnamesArg The argument names.
   /// \param MapperAllocas The AllocaInst used for the call.
   /// \param DeviceID Device ID for the call.
-  /// \param TotalNbOperand Number of operand in the call.
+  /// \param NumOperands Number of operands in the call.
   void emitMapperCall(const LocationDescription &Loc, Function *MapperFunc,
                       Value *SrcLocInfo, Value *MaptypesArg, Value *MapnamesArg,
                       struct MapperAllocas &MapperAllocas, int64_t DeviceID,
@@ -705,7 +864,7 @@ public:
   /// \param BodyGenCB Callback that will generate the region code.
   /// \param FiniCB Callback to finialize variable copies.
   ///
-  /// \returns The insertion position *after* the master.
+  /// \returns The insertion position *after* the masked.
   InsertPointTy createMasked(const LocationDescription &Loc,
                              BodyGenCallbackTy BodyGenCB,
                              FinalizeCallbackTy FiniCB, Value *Filter);
@@ -718,12 +877,41 @@ public:
   /// \param CriticalName name of the lock used by the critical directive
   /// \param HintInst Hint Instruction for hint clause associated with critical
   ///
-  /// \returns The insertion position *after* the master.
+  /// \returns The insertion position *after* the critical.
   InsertPointTy createCritical(const LocationDescription &Loc,
                                BodyGenCallbackTy BodyGenCB,
                                FinalizeCallbackTy FiniCB,
                                StringRef CriticalName, Value *HintInst);
 
+  /// Generator for '#omp ordered depend (source | sink)'
+  ///
+  /// \param Loc The insert and source location description.
+  /// \param AllocaIP The insertion point to be used for alloca instructions.
+  /// \param NumLoops The number of loops in depend clause.
+  /// \param StoreValues The value will be stored in vector address.
+  /// \param Name The name of alloca instruction.
+  /// \param IsDependSource If true, depend source; otherwise, depend sink.
+  ///
+  /// \return The insertion position *after* the ordered.
+  InsertPointTy createOrderedDepend(const LocationDescription &Loc,
+                                    InsertPointTy AllocaIP, unsigned NumLoops,
+                                    ArrayRef<llvm::Value *> StoreValues,
+                                    const Twine &Name, bool IsDependSource);
+
+  /// Generator for '#omp ordered [threads | simd]'
+  ///
+  /// \param Loc The insert and source location description.
+  /// \param BodyGenCB Callback that will generate the region code.
+  /// \param FiniCB Callback to finalize variable copies.
+  /// \param IsThreads If true, with threads clause or without clause;
+  /// otherwise, with simd clause;
+  ///
+  /// \returns The insertion position *after* the ordered.
+  InsertPointTy createOrderedThreadsSimd(const LocationDescription &Loc,
+                                         BodyGenCallbackTy BodyGenCB,
+                                         FinalizeCallbackTy FiniCB,
+                                         bool IsThreads);
+
   /// Generator for '#omp sections'
   ///
   /// \param Loc The insert and source location description.
@@ -816,14 +1004,16 @@ public:
   /// \param Loc The insert and source location description.
   /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not.
   /// \param RequiresFullRuntime Indicate if a full device runtime is necessary.
-  InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD, bool RequiresFullRuntime);
+  InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD,
+                                 bool RequiresFullRuntime);
 
   /// Create a runtime call for kmpc_target_deinit
   ///
   /// \param Loc The insert and source location description.
   /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not.
   /// \param RequiresFullRuntime Indicate if a full device runtime is necessary.
-  void createTargetDeinit(const LocationDescription &Loc, bool IsSPMD, bool RequiresFullRuntime);
+  void createTargetDeinit(const LocationDescription &Loc, bool IsSPMD,
+                          bool RequiresFullRuntime);
 
   ///}
 
@@ -1121,7 +1311,25 @@ public:
 /// The control-flow structure is standardized for easy consumption by
 /// directives associated with loops. For instance, the worksharing-loop
 /// construct may change this control flow such that each loop iteration is
-/// executed on only one thread.
+/// executed on only one thread. The constraints of a canonical loop in brief
+/// are:
+///
+///  * The number of loop iterations must have been computed before entering the
+///    loop.
+///
+///  * Has an (unsigned) logical induction variable that starts at zero and
+///    increments by one.
+///
+///  * The loop's CFG itself has no side-effects. The OpenMP specification
+///    itself allows side-effects, but the order in which they happen, including
+///    how often or whether at all, is unspecified. We expect that the frontend
+///    will emit those side-effect instructions somewhere (e.g. before the loop)
+///    such that the CanonicalLoopInfo itself can be side-effect free.
+///
+/// Keep in mind that CanonicalLoopInfo is meant to only describe a repeated
+/// execution of a loop body that satifies these constraints. It does NOT
+/// represent arbitrary SESE regions that happen to contain a loop. Do not use
+/// CanonicalLoopInfo for such purposes.
 ///
 /// The control flow can be described as follows:
 ///
@@ -1141,73 +1349,149 @@ public:
 ///              |
 ///            After
 ///
-/// Code in the header, condition block, latch and exit block must not have any
-/// side-effect. The body block is the single entry point into the loop body,
-/// which may contain arbitrary control flow as long as all control paths
-/// eventually branch to the latch block.
+/// The loop is thought to start at PreheaderIP (at the Preheader's terminator,
+/// including) and end at AfterIP (at the After's first instruction, excluding).
+/// That is, instructions in the Preheader and After blocks (except the
+/// Preheader's terminator) are out of CanonicalLoopInfo's control and may have
+/// side-effects. Typically, the Preheader is used to compute the loop's trip
+/// count. The instructions from BodyIP (at the Body block's first instruction,
+/// excluding) until the Latch are also considered outside CanonicalLoopInfo's
+/// control and thus can have side-effects. The body block is the single entry
+/// point into the loop body, which may contain arbitrary control flow as long
+/// as all control paths eventually branch to the Latch block.
+///
+/// TODO: Consider adding another standardized BasicBlock between Body CFG and
+/// Latch to guarantee that there is only a single edge to the latch. It would
+/// make loop transformations easier to not needing to consider multiple
+/// predecessors of the latch (See redirectAllPredecessorsTo) and would give us
+/// an equivalant to PreheaderIP, AfterIP and BodyIP for inserting code that
+/// executes after each body iteration.
+///
+/// There must be no loop-carried dependencies through llvm::Values. This is
+/// equivalant to that the Latch has no PHINode and the Header's only PHINode is
+/// for the induction variable.
+///
+/// All code in Header, Cond, Latch and Exit (plus the terminator of the
+/// Preheader) are CanonicalLoopInfo's responsibility and their build-up checked
+/// by assertOK(). They are expected to not be modified unless explicitly
+/// modifying the CanonicalLoopInfo through a methods that applies a OpenMP
+/// loop-associated construct such as applyWorkshareLoop, tileLoops, unrollLoop,
+/// etc. These methods usually invalidate the CanonicalLoopInfo and re-use its
+/// basic blocks. After invalidation, the CanonicalLoopInfo must not be used
+/// anymore as its underlying control flow may not exist anymore.
+/// Loop-transformation methods such as tileLoops, collapseLoops and unrollLoop
+/// may also return a new CanonicalLoopInfo that can be passed to other
+/// loop-associated construct implementing methods. These loop-transforming
+/// methods may either create a new CanonicalLoopInfo usually using
+/// createLoopSkeleton and invalidate the input CanonicalLoopInfo, or reuse and
+/// modify one of the input CanonicalLoopInfo and return it as representing the
+/// modified loop. What is done is an implementation detail of
+/// transformation-implementing method and callers should always assume that the
+/// CanonicalLoopInfo passed to it is invalidated and a new object is returned.
+/// Returned CanonicalLoopInfo have the same structure and guarantees as the one
+/// created by createCanonicalLoop, such that transforming methods do not have
+/// to special case where the CanonicalLoopInfo originated from.
+///
+/// Generally, methods consuming CanonicalLoopInfo do not need an
+/// OpenMPIRBuilder::InsertPointTy as argument, but use the locations of the
+/// CanonicalLoopInfo to insert new or modify existing instructions. Unless
+/// documented otherwise, methods consuming CanonicalLoopInfo do not invalidate
+/// any InsertPoint that is outside CanonicalLoopInfo's control. Specifically,
+/// any InsertPoint in the Preheader, After or Block can still be used after
+/// calling such a method.
 ///
-/// Defined outside OpenMPIRBuilder because one cannot forward-declare nested
-/// classes.
+/// TODO: Provide mechanisms for exception handling and cancellation points.
+///
+/// Defined outside OpenMPIRBuilder because nested classes cannot be
+/// forward-declared, e.g. to avoid having to include the entire OMPIRBuilder.h.
 class CanonicalLoopInfo {
   friend class OpenMPIRBuilder;
 
 private:
-  /// Whether this object currently represents a loop.
-  bool IsValid = false;
-
-  BasicBlock *Preheader;
-  BasicBlock *Header;
-  BasicBlock *Cond;
-  BasicBlock *Body;
-  BasicBlock *Latch;
-  BasicBlock *Exit;
-  BasicBlock *After;
+  BasicBlock *Preheader = nullptr;
+  BasicBlock *Header = nullptr;
+  BasicBlock *Cond = nullptr;
+  BasicBlock *Body = nullptr;
+  BasicBlock *Latch = nullptr;
+  BasicBlock *Exit = nullptr;
+  BasicBlock *After = nullptr;
 
   /// Add the control blocks of this loop to \p BBs.
   ///
   /// This does not include any block from the body, including the one returned
   /// by getBody().
+  ///
+  /// FIXME: This currently includes the Preheader and After blocks even though
+  /// their content is (mostly) not under CanonicalLoopInfo's control.
+  /// Re-evaluated whether this makes sense.
   void collectControlBlocks(SmallVectorImpl<BasicBlock *> &BBs);
 
 public:
+  /// Returns whether this object currently represents the IR of a loop. If
+  /// returning false, it may have been consumed by a loop transformation or not
+  /// been intialized. Do not use in this case;
+  bool isValid() const { return Header; }
+
   /// The preheader ensures that there is only a single edge entering the loop.
   /// Code that must be execute before any loop iteration can be emitted here,
   /// such as computing the loop trip count and begin lifetime markers. Code in
   /// the preheader is not considered part of the canonical loop.
-  BasicBlock *getPreheader() const { return Preheader; }
+  BasicBlock *getPreheader() const {
+    assert(isValid() && "Requires a valid canonical loop");
+    return Preheader;
+  }
 
   /// The header is the entry for each iteration. In the canonical control flow,
   /// it only contains the PHINode for the induction variable.
-  BasicBlock *getHeader() const { return Header; }
+  BasicBlock *getHeader() const {
+    assert(isValid() && "Requires a valid canonical loop");
+    return Header;
+  }
 
   /// The condition block computes whether there is another loop iteration. If
   /// yes, branches to the body; otherwise to the exit block.
-  BasicBlock *getCond() const { return Cond; }
+  BasicBlock *getCond() const {
+    assert(isValid() && "Requires a valid canonical loop");
+    return Cond;
+  }
 
   /// The body block is the single entry for a loop iteration and not controlled
   /// by CanonicalLoopInfo. It can contain arbitrary control flow but must
   /// eventually branch to the \p Latch block.
-  BasicBlock *getBody() const { return Body; }
+  BasicBlock *getBody() const {
+    assert(isValid() && "Requires a valid canonical loop");
+    return Body;
+  }
 
   /// Reaching the latch indicates the end of the loop body code. In the
   /// canonical control flow, it only contains the increment of the induction
   /// variable.
-  BasicBlock *getLatch() const { return Latch; }
+  BasicBlock *getLatch() const {
+    assert(isValid() && "Requires a valid canonical loop");
+    return Latch;
+  }
 
   /// Reaching the exit indicates no more iterations are being executed.
-  BasicBlock *getExit() const { return Exit; }
+  BasicBlock *getExit() const {
+    assert(isValid() && "Requires a valid canonical loop");
+    return Exit;
+  }
 
   /// The after block is intended for clean-up code such as lifetime end
   /// markers. It is separate from the exit block to ensure, analogous to the
   /// preheader, it having just a single entry edge and being free from PHI
   /// nodes should there be multiple loop exits (such as from break
   /// statements/cancellations).
-  BasicBlock *getAfter() const { return After; }
+  BasicBlock *getAfter() const {
+    assert(isValid() && "Requires a valid canonical loop");
+    return After;
+  }
 
   /// Returns the llvm::Value containing the number of loop iterations. It must
   /// be valid in the preheader and always interpreted as an unsigned integer of
   /// any bit-width.
   Value *getTripCount() const {
+    assert(isValid() && "Requires a valid canonical loop");
     Instruction *CmpI = &Cond->front();
     assert(isa<CmpInst>(CmpI) && "First inst must compare IV with TripCount");
     return CmpI->getOperand(1);
@@ -1216,33 +1500,47 @@ public:
   /// Returns the instruction representing the current logical induction
   /// variable. Always unsigned, always starting at 0 with an increment of one.
   Instruction *getIndVar() const {
+    assert(isValid() && "Requires a valid canonical loop");
     Instruction *IndVarPHI = &Header->front();
     assert(isa<PHINode>(IndVarPHI) && "First inst must be the IV PHI");
     return IndVarPHI;
   }
 
   /// Return the type of the induction variable (and the trip count).
-  Type *getIndVarType() const { return getIndVar()->getType(); }
+  Type *getIndVarType() const {
+    assert(isValid() && "Requires a valid canonical loop");
+    return getIndVar()->getType();
+  }
 
   /// Return the insertion point for user code before the loop.
   OpenMPIRBuilder::InsertPointTy getPreheaderIP() const {
+    assert(isValid() && "Requires a valid canonical loop");
     return {Preheader, std::prev(Preheader->end())};
   };
 
   /// Return the insertion point for user code in the body.
   OpenMPIRBuilder::InsertPointTy getBodyIP() const {
+    assert(isValid() && "Requires a valid canonical loop");
     return {Body, Body->begin()};
   };
 
   /// Return the insertion point for user code after the loop.
   OpenMPIRBuilder::InsertPointTy getAfterIP() const {
+    assert(isValid() && "Requires a valid canonical loop");
     return {After, After->begin()};
   };
 
-  Function *getFunction() const { return Header->getParent(); }
+  Function *getFunction() const {
+    assert(isValid() && "Requires a valid canonical loop");
+    return Header->getParent();
+  }
 
   /// Consistency self-check.
   void assertOK() const;
+
+  /// Invalidate this loop. That is, the underlying IR does not fulfill the
+  /// requirements of an OpenMP canonical loop anymore.
+  void invalidate();
 };
 
 } // end namespace llvm
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
index eb673b199fc4..8e4f7568fb9c 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -39,7 +39,6 @@ __OMP_TYPE(Int32Ptr)
 __OMP_TYPE(Int64Ptr)
 
 OMP_TYPE(SizeTy, M.getDataLayout().getIntPtrType(Ctx))
-OMP_TYPE(LanemaskTy, getLanemaskType())
 
 #define __OMP_PTR_TYPE(NAME, BASE) OMP_TYPE(NAME, BASE->getPointerTo())
 
@@ -272,6 +271,15 @@ __OMP_RTL(__kmpc_for_static_init_8, false, Void, IdentPtr, Int32, Int32,
 __OMP_RTL(__kmpc_for_static_init_8u, false, Void, IdentPtr, Int32, Int32,
           Int32Ptr, Int64Ptr, Int64Ptr, Int64Ptr, Int64, Int64)
 __OMP_RTL(__kmpc_for_static_fini, false, Void, IdentPtr, Int32)
+__OMP_RTL(__kmpc_distribute_static_init_4, false, Void, IdentPtr, Int32, Int32,
+          Int32Ptr, Int32Ptr, Int32Ptr, Int32Ptr, Int32, Int32)
+__OMP_RTL(__kmpc_distribute_static_init_4u, false, Void, IdentPtr, Int32, Int32,
+          Int32Ptr, Int32Ptr, Int32Ptr, Int32Ptr, Int32, Int32)
+__OMP_RTL(__kmpc_distribute_static_init_8, false, Void, IdentPtr, Int32, Int32,
+          Int32Ptr, Int64Ptr, Int64Ptr, Int64Ptr, Int64, Int64)
+__OMP_RTL(__kmpc_distribute_static_init_8u, false, Void, IdentPtr, Int32, Int32,
+          Int32Ptr, Int64Ptr, Int64Ptr, Int64Ptr, Int64, Int64)
+__OMP_RTL(__kmpc_distribute_static_fini, false, Void, IdentPtr, Int32)
 __OMP_RTL(__kmpc_dist_dispatch_init_4, false, Void, IdentPtr, Int32, Int32,
           Int32Ptr, Int32, Int32, Int32, Int32)
 __OMP_RTL(__kmpc_dist_dispatch_init_4u, false, Void, IdentPtr, Int32, Int32,
@@ -415,8 +423,8 @@ __OMP_RTL(__kmpc_task_allow_completion_event, false, VoidPtr, IdentPtr,
           /* Int */ Int32, /* kmp_task_t */ VoidPtr)
 
 /// OpenMP Device runtime functions
-__OMP_RTL(__kmpc_target_init, false, Int32, IdentPtr, Int1, Int1, Int1)
-__OMP_RTL(__kmpc_target_deinit, false, Void, IdentPtr, Int1, Int1)
+__OMP_RTL(__kmpc_target_init, false, Int32, IdentPtr, Int8, Int1, Int1)
+__OMP_RTL(__kmpc_target_deinit, false, Void, IdentPtr, Int8, Int1)
 __OMP_RTL(__kmpc_kernel_prepare_parallel, false, Void, VoidPtr)
 __OMP_RTL(__kmpc_parallel_51, false, Void, IdentPtr, Int32, Int32, Int32, Int32,
           VoidPtr, VoidPtr, VoidPtrPtr, SizeTy)
@@ -442,9 +450,12 @@ __OMP_RTL(__kmpc_get_shared_variables, false, Void, VoidPtrPtrPtr)
 __OMP_RTL(__kmpc_parallel_level, false, Int8, )
 __OMP_RTL(__kmpc_is_spmd_exec_mode, false, Int8, )
 __OMP_RTL(__kmpc_barrier_simple_spmd, false, Void, IdentPtr, Int32)
+__OMP_RTL(__kmpc_barrier_simple_generic, false, Void, IdentPtr, Int32)
 
-__OMP_RTL(__kmpc_warp_active_thread_mask, false, LanemaskTy,)
-__OMP_RTL(__kmpc_syncwarp, false, Void, LanemaskTy)
+__OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,)
+__OMP_RTL(__kmpc_syncwarp, false, Void, Int64)
+
+__OMP_RTL(__kmpc_get_warp_size, false, Int32, )
 
 __OMP_RTL(__kmpc_is_generic_main_thread_id, false, Int8, Int32)
 
@@ -510,6 +521,11 @@ __OMP_ATTRS_SET(NoCaptureAttrs,
                     ? AttributeSet(EnumAttr(NoCapture))
                     : AttributeSet(EnumAttr(NoCapture)))
 
+__OMP_ATTRS_SET(AlwaysInlineAttrs,
+                OptimisticAttributes
+                    ? AttributeSet(EnumAttr(AlwaysInline))
+                    : AttributeSet(EnumAttr(AlwaysInline)))
+
 #if 0
 __OMP_ATTRS_SET(InaccessibleOnlyAttrs,
                 OptimisticAttributes
@@ -535,6 +551,11 @@ __OMP_ATTRS_SET(ReadOnlyPtrAttrs,
                                    EnumAttr(NoCapture))
                     : AttributeSet())
 
+__OMP_ATTRS_SET(DeviceAllocAttrs,
+                OptimisticAttributes
+                    ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoSync))
+                    : AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoSync)))
+
 #if 0
 __OMP_ATTRS_SET(WriteOnlyPtrAttrs,
                 OptimisticAttributes
@@ -575,6 +596,8 @@ __OMP_RTL_ATTRS(__kmpc_barrier, BarrierAttrs, AttributeSet(),
                 ParamAttrs(ReadOnlyPtrAttrs))
 __OMP_RTL_ATTRS(__kmpc_barrier_simple_spmd, BarrierAttrs, AttributeSet(),
                 ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_barrier_simple_generic, BarrierAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs))
 __OMP_RTL_ATTRS(__kmpc_warp_active_thread_mask, BarrierAttrs, AttributeSet(),
                 ParamAttrs())
 __OMP_RTL_ATTRS(__kmpc_syncwarp, BarrierAttrs, AttributeSet(), ParamAttrs())
@@ -703,6 +726,28 @@ __OMP_RTL_ATTRS(__kmpc_for_static_init_8u, GetterArgWriteAttrs, AttributeSet(),
                            AttributeSet(), AttributeSet()))
 __OMP_RTL_ATTRS(__kmpc_for_static_fini, InaccessibleArgOnlyAttrs,
                 AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_distribute_static_init_4, GetterArgWriteAttrs,
+                AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+                           ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs,
+                           AttributeSet(), AttributeSet()))
+__OMP_RTL_ATTRS(__kmpc_distribute_static_init_4u, GetterArgWriteAttrs,
+                AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+                           ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs,
+                           AttributeSet(), AttributeSet()))
+__OMP_RTL_ATTRS(__kmpc_distribute_static_init_8, GetterArgWriteAttrs,
+                AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+                           ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs,
+                           AttributeSet(), AttributeSet()))
+__OMP_RTL_ATTRS(__kmpc_distribute_static_init_8u, GetterArgWriteAttrs,
+                AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+                           ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs,
+                           AttributeSet(), AttributeSet()))
+__OMP_RTL_ATTRS(__kmpc_distribute_static_fini, InaccessibleArgOnlyAttrs,
+                AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs))
 __OMP_RTL_ATTRS(__kmpc_dist_dispatch_init_4, GetterArgWriteAttrs,
                 AttributeSet(),
                 ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
@@ -854,9 +899,9 @@ __OMP_RTL_ATTRS(__kmpc_doacross_wait, BarrierAttrs, AttributeSet(),
 __OMP_RTL_ATTRS(__kmpc_doacross_fini, BarrierAttrs, AttributeSet(),
                 ParamAttrs(ReadOnlyPtrAttrs))
 
-__OMP_RTL_ATTRS(__kmpc_alloc_shared, DefaultAttrs, ReturnPtrAttrs,
+__OMP_RTL_ATTRS(__kmpc_alloc_shared, DeviceAllocAttrs, ReturnPtrAttrs,
                 ParamAttrs())
-__OMP_RTL_ATTRS(__kmpc_free_shared, AllocAttrs, AttributeSet(),
+__OMP_RTL_ATTRS(__kmpc_free_shared, DeviceAllocAttrs, AttributeSet(),
                 ParamAttrs(NoCaptureAttrs))
 
 __OMP_RTL_ATTRS(__kmpc_alloc, DefaultAttrs, ReturnPtrAttrs, ParamAttrs())
@@ -897,6 +942,9 @@ __OMP_RTL_ATTRS(__tgt_push_mapper_component, ForkAttrs, AttributeSet(),
 __OMP_RTL_ATTRS(__kmpc_task_allow_completion_event, DefaultAttrs,
                 ReturnPtrAttrs, ParamAttrs(ReadOnlyPtrAttrs))
 
+__OMP_RTL_ATTRS(__kmpc_parallel_51, AlwaysInlineAttrs, AttributeSet(),
+                ParamAttrs())
+
 #undef __OMP_RTL_ATTRS
 #undef OMP_RTL_ATTRS
 #undef AttributeSet
@@ -920,6 +968,7 @@ __OMP_RTL_ATTRS(__kmpc_task_allow_completion_event, DefaultAttrs,
   OMP_IDENT_FLAG(OMP_IDENT_FLAG_##Name, #Name, Value)
 
 __OMP_IDENT_FLAG(KMPC, 0x02)
+__OMP_IDENT_FLAG(ATOMIC_REDUCE, 0x10)
 __OMP_IDENT_FLAG(BARRIER_EXPL, 0x20)
 __OMP_IDENT_FLAG(BARRIER_IMPL, 0x0040)
 __OMP_IDENT_FLAG(BARRIER_IMPL_MASK, 0x01C0)
diff --git a/llvm/include/llvm/IR/AbstractCallSite.h b/llvm/include/llvm/IR/AbstractCallSite.h
index e8cf05001542..31df4c75b6e7 100644
--- a/llvm/include/llvm/IR/AbstractCallSite.h
+++ b/llvm/include/llvm/IR/AbstractCallSite.h
@@ -153,7 +153,7 @@ public:
   /// Return the number of parameters of the callee.
   unsigned getNumArgOperands() const {
     if (isDirectCall())
-      return CB->getNumArgOperands();
+      return CB->arg_size();
     // Subtract 1 for the callee encoding.
     return CI.ParameterEncoding.size() - 1;
   }
diff --git a/llvm/include/llvm/IR/Argument.h b/llvm/include/llvm/IR/Argument.h
index dcf658f439b4..396ab6a9d01d 100644
--- a/llvm/include/llvm/IR/Argument.h
+++ b/llvm/include/llvm/IR/Argument.h
@@ -97,7 +97,7 @@ public:
   /// If this is a byval or inalloca argument, return its alignment.
   /// FIXME: Remove this function once transition to Align is over.
   /// Use getParamAlign() instead.
-  unsigned getParamAlignment() const;
+  uint64_t getParamAlignment() const;
 
   /// If this is a byval or inalloca argument, return its alignment.
   MaybeAlign getParamAlign() const;
diff --git a/llvm/include/llvm/IR/Assumptions.h b/llvm/include/llvm/IR/Assumptions.h
index f64616c25d87..08e6c8b6f1e0 100644
--- a/llvm/include/llvm/IR/Assumptions.h
+++ b/llvm/include/llvm/IR/Assumptions.h
@@ -15,12 +15,14 @@
 #ifndef LLVM_IR_ASSUMPTIONS_H
 #define LLVM_IR_ASSUMPTIONS_H
 
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSet.h"
 
 namespace llvm {
 
 class Function;
+class CallBase;
 
 /// The key we use for assumption attributes.
 constexpr StringRef AssumptionAttrKey = "llvm.assume";
@@ -43,7 +45,25 @@ private:
 };
 
 /// Return true if \p F has the assumption \p AssumptionStr attached.
-bool hasAssumption(Function &F, const KnownAssumptionString &AssumptionStr);
+bool hasAssumption(const Function &F,
+                   const KnownAssumptionString &AssumptionStr);
+
+/// Return true if \p CB or the callee has the assumption \p AssumptionStr
+/// attached.
+bool hasAssumption(const CallBase &CB,
+                   const KnownAssumptionString &AssumptionStr);
+
+/// Return the set of all assumptions for the function \p F.
+DenseSet<StringRef> getAssumptions(const Function &F);
+
+/// Return the set of all assumptions for the call \p CB.
+DenseSet<StringRef> getAssumptions(const CallBase &CB);
+
+/// Appends the set of assumptions \p Assumptions to \F.
+bool addAssumptions(Function &F, const DenseSet<StringRef> &Assumptions);
+
+/// Appends the set of assumptions \p Assumptions to \CB.
+bool addAssumptions(CallBase &CB, const DenseSet<StringRef> &Assumptions);
 
 } // namespace llvm
 
diff --git a/llvm/include/llvm/IR/Attributes.h b/llvm/include/llvm/IR/Attributes.h
index d7bd3edb3d4c..282be640d8be 100644
--- a/llvm/include/llvm/IR/Attributes.h
+++ b/llvm/include/llvm/IR/Attributes.h
@@ -37,7 +37,6 @@ class AttrBuilder;
 class AttributeImpl;
 class AttributeListImpl;
 class AttributeSetNode;
-template<typename T> struct DenseMapInfo;
 class FoldingSetNodeID;
 class Function;
 class LLVMContext;
@@ -78,6 +77,7 @@ public:
     TombstoneKey,          ///< Use as Tombstone key for DenseMap of AttrKind
   };
 
+  static const unsigned NumIntAttrKinds = LastIntAttr - FirstIntAttr + 1;
   static const unsigned NumTypeAttrKinds = LastTypeAttr - FirstTypeAttr + 1;
 
   static bool isEnumAttrKind(AttrKind Kind) {
@@ -265,7 +265,7 @@ inline Attribute unwrap(LLVMAttributeRef Attr) {
 /// and removing string or integer attributes involves a FoldingSet lookup.
 class AttributeSet {
   friend AttributeListImpl;
-  template <typename Ty> friend struct DenseMapInfo;
+  template <typename Ty, typename Enable> friend struct DenseMapInfo;
 
   // TODO: Extract AvailableAttrs from AttributeSetNode and store them here.
   // This will allow an efficient implementation of addAttribute and
@@ -366,7 +366,7 @@ public:
 //===----------------------------------------------------------------------===//
 /// \class
 /// Provide DenseMapInfo for AttributeSet.
-template <> struct DenseMapInfo<AttributeSet> {
+template <> struct DenseMapInfo<AttributeSet, void> {
   static AttributeSet getEmptyKey() {
     auto Val = static_cast<uintptr_t>(-1);
     Val <<= PointerLikeTypeTraits<void *>::NumLowBitsAvailable;
@@ -408,7 +408,7 @@ private:
   friend class AttributeListImpl;
   friend class AttributeSet;
   friend class AttributeSetNode;
-  template <typename Ty> friend struct DenseMapInfo;
+  template <typename Ty, typename Enable> friend struct DenseMapInfo;
 
   /// The attributes that we are managing. This can be null to represent
   /// the empty attributes list.
@@ -432,8 +432,8 @@ private:
 
   static AttributeList getImpl(LLVMContext &C, ArrayRef<AttributeSet> AttrSets);
 
-  AttributeList setAttributes(LLVMContext &C, unsigned Index,
-                              AttributeSet Attrs) const;
+  AttributeList setAttributesAtIndex(LLVMContext &C, unsigned Index,
+                                     AttributeSet Attrs) const;
 
 public:
   AttributeList() = default;
@@ -454,32 +454,84 @@ public:
   static AttributeList get(LLVMContext &C, unsigned Index,
                            const AttrBuilder &B);
 
+  // TODO: remove non-AtIndex versions of these methods.
   /// Add an attribute to the attribute set at the given index.
   /// Returns a new list because attribute lists are immutable.
-  LLVM_NODISCARD AttributeList addAttribute(LLVMContext &C, unsigned Index,
-                                            Attribute::AttrKind Kind) const;
+  LLVM_NODISCARD AttributeList addAttributeAtIndex(
+      LLVMContext &C, unsigned Index, Attribute::AttrKind Kind) const;
 
   /// Add an attribute to the attribute set at the given index.
   /// Returns a new list because attribute lists are immutable.
   LLVM_NODISCARD AttributeList
-  addAttribute(LLVMContext &C, unsigned Index, StringRef Kind,
-               StringRef Value = StringRef()) const;
+  addAttributeAtIndex(LLVMContext &C, unsigned Index, StringRef Kind,
+                      StringRef Value = StringRef()) const;
 
   /// Add an attribute to the attribute set at the given index.
   /// Returns a new list because attribute lists are immutable.
-  LLVM_NODISCARD AttributeList addAttribute(LLVMContext &C, unsigned Index,
-                                            Attribute A) const;
+  LLVM_NODISCARD AttributeList addAttributeAtIndex(LLVMContext &C,
+                                                   unsigned Index,
+                                                   Attribute A) const;
 
   /// Add attributes to the attribute set at the given index.
   /// Returns a new list because attribute lists are immutable.
-  LLVM_NODISCARD AttributeList addAttributes(LLVMContext &C, unsigned Index,
-                                             const AttrBuilder &B) const;
+  LLVM_NODISCARD AttributeList addAttributesAtIndex(LLVMContext &C,
+                                                    unsigned Index,
+                                                    const AttrBuilder &B) const;
+
+  /// Add a function attribute to the list. Returns a new list because
+  /// attribute lists are immutable.
+  LLVM_NODISCARD AttributeList addFnAttribute(LLVMContext &C,
+                                              Attribute::AttrKind Kind) const {
+    return addAttributeAtIndex(C, FunctionIndex, Kind);
+  }
+
+  /// Add a function attribute to the list. Returns a new list because
+  /// attribute lists are immutable.
+  LLVM_NODISCARD AttributeList addFnAttribute(LLVMContext &C,
+                                              Attribute Attr) const {
+    return addAttributeAtIndex(C, FunctionIndex, Attr);
+  }
+
+  /// Add a function attribute to the list. Returns a new list because
+  /// attribute lists are immutable.
+  LLVM_NODISCARD AttributeList addFnAttribute(
+      LLVMContext &C, StringRef Kind, StringRef Value = StringRef()) const {
+    return addAttributeAtIndex(C, FunctionIndex, Kind, Value);
+  }
+
+  /// Add function attribute to the list. Returns a new list because
+  /// attribute lists are immutable.
+  LLVM_NODISCARD AttributeList addFnAttributes(LLVMContext &C,
+                                               const AttrBuilder &B) const {
+    return addAttributesAtIndex(C, FunctionIndex, B);
+  }
+
+  /// Add a return value attribute to the list. Returns a new list because
+  /// attribute lists are immutable.
+  LLVM_NODISCARD AttributeList addRetAttribute(LLVMContext &C,
+                                               Attribute::AttrKind Kind) const {
+    return addAttributeAtIndex(C, ReturnIndex, Kind);
+  }
+
+  /// Add a return value attribute to the list. Returns a new list because
+  /// attribute lists are immutable.
+  LLVM_NODISCARD AttributeList addRetAttribute(LLVMContext &C,
+                                               Attribute Attr) const {
+    return addAttributeAtIndex(C, ReturnIndex, Attr);
+  }
+
+  /// Add a return value attribute to the list. Returns a new list because
+  /// attribute lists are immutable.
+  LLVM_NODISCARD AttributeList addRetAttributes(LLVMContext &C,
+                                                const AttrBuilder &B) const {
+    return addAttributesAtIndex(C, ReturnIndex, B);
+  }
 
   /// Add an argument attribute to the list. Returns a new list because
   /// attribute lists are immutable.
   LLVM_NODISCARD AttributeList addParamAttribute(
       LLVMContext &C, unsigned ArgNo, Attribute::AttrKind Kind) const {
-    return addAttribute(C, ArgNo + FirstArgIndex, Kind);
+    return addAttributeAtIndex(C, ArgNo + FirstArgIndex, Kind);
   }
 
   /// Add an argument attribute to the list. Returns a new list because
@@ -487,7 +539,7 @@ public:
   LLVM_NODISCARD AttributeList
   addParamAttribute(LLVMContext &C, unsigned ArgNo, StringRef Kind,
                     StringRef Value = StringRef()) const {
-    return addAttribute(C, ArgNo + FirstArgIndex, Kind, Value);
+    return addAttributeAtIndex(C, ArgNo + FirstArgIndex, Kind, Value);
   }
 
   /// Add an attribute to the attribute list at the given arg indices. Returns a
@@ -501,34 +553,87 @@ public:
   LLVM_NODISCARD AttributeList addParamAttributes(LLVMContext &C,
                                                   unsigned ArgNo,
                                                   const AttrBuilder &B) const {
-    return addAttributes(C, ArgNo + FirstArgIndex, B);
+    return addAttributesAtIndex(C, ArgNo + FirstArgIndex, B);
   }
 
   /// Remove the specified attribute at the specified index from this
   /// attribute list. Returns a new list because attribute lists are immutable.
-  LLVM_NODISCARD AttributeList removeAttribute(LLVMContext &C, unsigned Index,
-                                               Attribute::AttrKind Kind) const;
+  LLVM_NODISCARD AttributeList removeAttributeAtIndex(
+      LLVMContext &C, unsigned Index, Attribute::AttrKind Kind) const;
 
   /// Remove the specified attribute at the specified index from this
   /// attribute list. Returns a new list because attribute lists are immutable.
+  LLVM_NODISCARD AttributeList removeAttributeAtIndex(LLVMContext &C,
+                                                      unsigned Index,
+                                                      StringRef Kind) const;
   LLVM_NODISCARD AttributeList removeAttribute(LLVMContext &C, unsigned Index,
-                                               StringRef Kind) const;
+                                               StringRef Kind) const {
+    return removeAttributeAtIndex(C, Index, Kind);
+  }
 
   /// Remove the specified attributes at the specified index from this
   /// attribute list. Returns a new list because attribute lists are immutable.
-  LLVM_NODISCARD AttributeList removeAttributes(
+  LLVM_NODISCARD AttributeList removeAttributesAtIndex(
       LLVMContext &C, unsigned Index, const AttrBuilder &AttrsToRemove) const;
 
   /// Remove all attributes at the specified index from this
   /// attribute list. Returns a new list because attribute lists are immutable.
-  LLVM_NODISCARD AttributeList removeAttributes(LLVMContext &C,
-                                                unsigned Index) const;
+  LLVM_NODISCARD AttributeList removeAttributesAtIndex(LLVMContext &C,
+                                                       unsigned Index) const;
+
+  /// Remove the specified attribute at the function index from this
+  /// attribute list. Returns a new list because attribute lists are immutable.
+  LLVM_NODISCARD AttributeList
+  removeFnAttribute(LLVMContext &C, Attribute::AttrKind Kind) const {
+    return removeAttributeAtIndex(C, FunctionIndex, Kind);
+  }
+
+  /// Remove the specified attribute at the function index from this
+  /// attribute list. Returns a new list because attribute lists are immutable.
+  LLVM_NODISCARD AttributeList removeFnAttribute(LLVMContext &C,
+                                                 StringRef Kind) const {
+    return removeAttributeAtIndex(C, FunctionIndex, Kind);
+  }
+
+  /// Remove the specified attribute at the function index from this
+  /// attribute list. Returns a new list because attribute lists are immutable.
+  LLVM_NODISCARD AttributeList
+  removeFnAttributes(LLVMContext &C, const AttrBuilder &AttrsToRemove) const {
+    return removeAttributesAtIndex(C, FunctionIndex, AttrsToRemove);
+  }
+
+  /// Remove the attributes at the function index from this
+  /// attribute list. Returns a new list because attribute lists are immutable.
+  LLVM_NODISCARD AttributeList removeFnAttributes(LLVMContext &C) const {
+    return removeAttributesAtIndex(C, FunctionIndex);
+  }
+
+  /// Remove the specified attribute at the return value index from this
+  /// attribute list. Returns a new list because attribute lists are immutable.
+  LLVM_NODISCARD AttributeList
+  removeRetAttribute(LLVMContext &C, Attribute::AttrKind Kind) const {
+    return removeAttributeAtIndex(C, ReturnIndex, Kind);
+  }
+
+  /// Remove the specified attribute at the return value index from this
+  /// attribute list. Returns a new list because attribute lists are immutable.
+  LLVM_NODISCARD AttributeList removeRetAttribute(LLVMContext &C,
+                                                  StringRef Kind) const {
+    return removeAttributeAtIndex(C, ReturnIndex, Kind);
+  }
+
+  /// Remove the specified attribute at the return value index from this
+  /// attribute list. Returns a new list because attribute lists are immutable.
+  LLVM_NODISCARD AttributeList
+  removeRetAttributes(LLVMContext &C, const AttrBuilder &AttrsToRemove) const {
+    return removeAttributesAtIndex(C, ReturnIndex, AttrsToRemove);
+  }
 
   /// Remove the specified attribute at the specified arg index from this
   /// attribute list. Returns a new list because attribute lists are immutable.
   LLVM_NODISCARD AttributeList removeParamAttribute(
       LLVMContext &C, unsigned ArgNo, Attribute::AttrKind Kind) const {
-    return removeAttribute(C, ArgNo + FirstArgIndex, Kind);
+    return removeAttributeAtIndex(C, ArgNo + FirstArgIndex, Kind);
   }
 
   /// Remove the specified attribute at the specified arg index from this
@@ -536,80 +641,55 @@ public:
   LLVM_NODISCARD AttributeList removeParamAttribute(LLVMContext &C,
                                                     unsigned ArgNo,
                                                     StringRef Kind) const {
-    return removeAttribute(C, ArgNo + FirstArgIndex, Kind);
+    return removeAttributeAtIndex(C, ArgNo + FirstArgIndex, Kind);
   }
 
   /// Remove the specified attribute at the specified arg index from this
   /// attribute list. Returns a new list because attribute lists are immutable.
   LLVM_NODISCARD AttributeList removeParamAttributes(
       LLVMContext &C, unsigned ArgNo, const AttrBuilder &AttrsToRemove) const {
-    return removeAttributes(C, ArgNo + FirstArgIndex, AttrsToRemove);
+    return removeAttributesAtIndex(C, ArgNo + FirstArgIndex, AttrsToRemove);
   }
 
   /// Remove all attributes at the specified arg index from this
   /// attribute list. Returns a new list because attribute lists are immutable.
   LLVM_NODISCARD AttributeList removeParamAttributes(LLVMContext &C,
                                                      unsigned ArgNo) const {
-    return removeAttributes(C, ArgNo + FirstArgIndex);
+    return removeAttributesAtIndex(C, ArgNo + FirstArgIndex);
   }
 
   /// Replace the type contained by attribute \p AttrKind at index \p ArgNo wih
   /// \p ReplacementTy, preserving all other attributes.
-  LLVM_NODISCARD AttributeList replaceAttributeType(LLVMContext &C,
-                                                    unsigned ArgNo,
-                                                    Attribute::AttrKind Kind,
-                                                    Type *ReplacementTy) const {
-    Attribute Attr = getAttribute(ArgNo, Kind);
-    auto Attrs = removeAttribute(C, ArgNo, Kind);
-    return Attrs.addAttribute(C, ArgNo, Attr.getWithNewType(C, ReplacementTy));
+  LLVM_NODISCARD AttributeList replaceAttributeTypeAtIndex(
+      LLVMContext &C, unsigned ArgNo, Attribute::AttrKind Kind,
+      Type *ReplacementTy) const {
+    Attribute Attr = getAttributeAtIndex(ArgNo, Kind);
+    auto Attrs = removeAttributeAtIndex(C, ArgNo, Kind);
+    return Attrs.addAttributeAtIndex(C, ArgNo,
+                                     Attr.getWithNewType(C, ReplacementTy));
   }
 
   /// \brief Add the dereferenceable attribute to the attribute set at the given
   /// index. Returns a new list because attribute lists are immutable.
-  LLVM_NODISCARD AttributeList addDereferenceableAttr(LLVMContext &C,
-                                                      unsigned Index,
-                                                      uint64_t Bytes) const;
+  LLVM_NODISCARD AttributeList addDereferenceableRetAttr(LLVMContext &C,
+                                                         uint64_t Bytes) const;
 
   /// \brief Add the dereferenceable attribute to the attribute set at the given
   /// arg index. Returns a new list because attribute lists are immutable.
   LLVM_NODISCARD AttributeList addDereferenceableParamAttr(
-      LLVMContext &C, unsigned ArgNo, uint64_t Bytes) const {
-    return addDereferenceableAttr(C, ArgNo + FirstArgIndex, Bytes);
-  }
-
-  /// Add the dereferenceable_or_null attribute to the attribute set at
-  /// the given index. Returns a new list because attribute lists are immutable.
-  LLVM_NODISCARD AttributeList addDereferenceableOrNullAttr(
-      LLVMContext &C, unsigned Index, uint64_t Bytes) const;
+      LLVMContext &C, unsigned ArgNo, uint64_t Bytes) const;
 
   /// Add the dereferenceable_or_null attribute to the attribute set at
   /// the given arg index. Returns a new list because attribute lists are
   /// immutable.
   LLVM_NODISCARD AttributeList addDereferenceableOrNullParamAttr(
-      LLVMContext &C, unsigned ArgNo, uint64_t Bytes) const {
-    return addDereferenceableOrNullAttr(C, ArgNo + FirstArgIndex, Bytes);
-  }
-
-  /// Add the allocsize attribute to the attribute set at the given index.
-  /// Returns a new list because attribute lists are immutable.
-  LLVM_NODISCARD AttributeList
-  addAllocSizeAttr(LLVMContext &C, unsigned Index, unsigned ElemSizeArg,
-                   const Optional<unsigned> &NumElemsArg);
+      LLVMContext &C, unsigned ArgNo, uint64_t Bytes) const;
 
   /// Add the allocsize attribute to the attribute set at the given arg index.
   /// Returns a new list because attribute lists are immutable.
   LLVM_NODISCARD AttributeList
   addAllocSizeParamAttr(LLVMContext &C, unsigned ArgNo, unsigned ElemSizeArg,
-                        const Optional<unsigned> &NumElemsArg) {
-    return addAllocSizeAttr(C, ArgNo + FirstArgIndex, ElemSizeArg, NumElemsArg);
-  }
-
-  /// Add the vscale_range attribute to the attribute set at the given index.
-  /// Returns a new list because attribute lists are immutable.
-  LLVM_NODISCARD AttributeList addVScaleRangeAttr(LLVMContext &C,
-                                                  unsigned Index,
-                                                  unsigned MinValue,
-                                                  unsigned MaxValue);
+                        const Optional<unsigned> &NumElemsArg);
 
   //===--------------------------------------------------------------------===//
   // AttributeList Accessors
@@ -620,48 +700,59 @@ public:
 
   /// The attributes for the argument or parameter at the given index are
   /// returned.
-  AttributeSet getParamAttributes(unsigned ArgNo) const;
+  AttributeSet getParamAttrs(unsigned ArgNo) const;
 
   /// The attributes for the ret value are returned.
-  AttributeSet getRetAttributes() const;
+  AttributeSet getRetAttrs() const;
 
   /// The function attributes are returned.
-  AttributeSet getFnAttributes() const;
+  AttributeSet getFnAttrs() const;
 
   /// Return true if the attribute exists at the given index.
-  bool hasAttribute(unsigned Index, Attribute::AttrKind Kind) const;
+  bool hasAttributeAtIndex(unsigned Index, Attribute::AttrKind Kind) const;
 
   /// Return true if the attribute exists at the given index.
-  bool hasAttribute(unsigned Index, StringRef Kind) const;
+  bool hasAttributeAtIndex(unsigned Index, StringRef Kind) const;
 
   /// Return true if attribute exists at the given index.
-  bool hasAttributes(unsigned Index) const;
+  bool hasAttributesAtIndex(unsigned Index) const;
 
   /// Return true if the attribute exists for the given argument
   bool hasParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) const {
-    return hasAttribute(ArgNo + FirstArgIndex, Kind);
+    return hasAttributeAtIndex(ArgNo + FirstArgIndex, Kind);
   }
 
   /// Return true if the attribute exists for the given argument
   bool hasParamAttr(unsigned ArgNo, StringRef Kind) const {
-    return hasAttribute(ArgNo + FirstArgIndex, Kind);
+    return hasAttributeAtIndex(ArgNo + FirstArgIndex, Kind);
   }
 
   /// Return true if attributes exists for the given argument
   bool hasParamAttrs(unsigned ArgNo) const {
-    return hasAttributes(ArgNo + FirstArgIndex);
+    return hasAttributesAtIndex(ArgNo + FirstArgIndex);
+  }
+
+  /// Return true if the attribute exists for the return value.
+  bool hasRetAttr(Attribute::AttrKind Kind) const {
+    return hasAttributeAtIndex(ReturnIndex, Kind);
   }
 
-  /// Equivalent to hasAttribute(AttributeList::FunctionIndex, Kind) but
-  /// may be faster.
-  bool hasFnAttribute(Attribute::AttrKind Kind) const;
+  /// Return true if the attribute exists for the return value.
+  bool hasRetAttr(StringRef Kind) const {
+    return hasAttributeAtIndex(ReturnIndex, Kind);
+  }
+
+  /// Return true if attributes exist for the return value.
+  bool hasRetAttrs() const { return hasAttributesAtIndex(ReturnIndex); }
+
+  /// Return true if the attribute exists for the function.
+  bool hasFnAttr(Attribute::AttrKind Kind) const;
 
-  /// Equivalent to hasAttribute(AttributeList::FunctionIndex, Kind) but
-  /// may be faster.
-  bool hasFnAttribute(StringRef Kind) const;
+  /// Return true if the attribute exists for the function.
+  bool hasFnAttr(StringRef Kind) const;
 
-  /// Equivalent to hasAttribute(ArgNo + FirstArgIndex, Kind).
-  bool hasParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const;
+  /// Return true the attributes exist for the function.
+  bool hasFnAttrs() const { return hasAttributesAtIndex(FunctionIndex); }
 
   /// Return true if the specified attribute is set for at least one
   /// parameter or for the return value. If Index is not nullptr, the index
@@ -670,19 +761,29 @@ public:
                         unsigned *Index = nullptr) const;
 
   /// Return the attribute object that exists at the given index.
-  Attribute getAttribute(unsigned Index, Attribute::AttrKind Kind) const;
+  Attribute getAttributeAtIndex(unsigned Index, Attribute::AttrKind Kind) const;
 
   /// Return the attribute object that exists at the given index.
-  Attribute getAttribute(unsigned Index, StringRef Kind) const;
+  Attribute getAttributeAtIndex(unsigned Index, StringRef Kind) const;
 
   /// Return the attribute object that exists at the arg index.
   Attribute getParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) const {
-    return getAttribute(ArgNo + FirstArgIndex, Kind);
+    return getAttributeAtIndex(ArgNo + FirstArgIndex, Kind);
   }
 
   /// Return the attribute object that exists at the given index.
   Attribute getParamAttr(unsigned ArgNo, StringRef Kind) const {
-    return getAttribute(ArgNo + FirstArgIndex, Kind);
+    return getAttributeAtIndex(ArgNo + FirstArgIndex, Kind);
+  }
+
+  /// Return the attribute object that exists for the function.
+  Attribute getFnAttr(Attribute::AttrKind Kind) const {
+    return getAttributeAtIndex(FunctionIndex, Kind);
+  }
+
+  /// Return the attribute object that exists for the function.
+  Attribute getFnAttr(StringRef Kind) const {
+    return getAttributeAtIndex(FunctionIndex, Kind);
   }
 
   /// Return the alignment of the return value.
@@ -712,34 +813,26 @@ public:
   /// Return the elementtype type for the specified function parameter.
   Type *getParamElementType(unsigned ArgNo) const;
 
-  /// Get the stack alignment.
-  MaybeAlign getStackAlignment(unsigned Index) const;
+  /// Get the stack alignment of the function.
+  MaybeAlign getFnStackAlignment() const;
 
-  /// Get the number of dereferenceable bytes (or zero if unknown).
-  uint64_t getDereferenceableBytes(unsigned Index) const;
+  /// Get the stack alignment of the return value.
+  MaybeAlign getRetStackAlignment() const;
 
-  /// Get the number of dereferenceable bytes (or zero if unknown) of an
-  /// arg.
-  uint64_t getParamDereferenceableBytes(unsigned ArgNo) const {
-    return getDereferenceableBytes(ArgNo + FirstArgIndex);
-  }
+  /// Get the number of dereferenceable bytes (or zero if unknown) of the return
+  /// value.
+  uint64_t getRetDereferenceableBytes() const;
 
-  /// Get the number of dereferenceable_or_null bytes (or zero if
-  /// unknown).
-  uint64_t getDereferenceableOrNullBytes(unsigned Index) const;
+  /// Get the number of dereferenceable bytes (or zero if unknown) of an arg.
+  uint64_t getParamDereferenceableBytes(unsigned Index) const;
 
-  /// Get the number of dereferenceable_or_null bytes (or zero if
-  /// unknown) of an arg.
-  uint64_t getParamDereferenceableOrNullBytes(unsigned ArgNo) const {
-    return getDereferenceableOrNullBytes(ArgNo + FirstArgIndex);
-  }
+  /// Get the number of dereferenceable_or_null bytes (or zero if unknown) of
+  /// the return value.
+  uint64_t getRetDereferenceableOrNullBytes() const;
 
-  /// Get the allocsize argument numbers (or pair(0, 0) if unknown).
-  std::pair<unsigned, Optional<unsigned>>
-  getAllocSizeArgs(unsigned Index) const;
-
-  /// Get the vscale_range argument numbers (or pair(0, 0) if unknown).
-  std::pair<unsigned, unsigned> getVScaleRangeArgs(unsigned Index) const;
+  /// Get the number of dereferenceable_or_null bytes (or zero if unknown) of an
+  /// arg.
+  uint64_t getParamDereferenceableOrNullBytes(unsigned ArgNo) const;
 
   /// Return the attributes at the index as a string.
   std::string getAsString(unsigned Index, bool InAttrGrp = false) const;
@@ -758,9 +851,32 @@ public:
 
   unsigned getNumAttrSets() const;
 
-  /// Use these to iterate over the valid attribute indices.
-  unsigned index_begin() const { return AttributeList::FunctionIndex; }
-  unsigned index_end() const { return getNumAttrSets() - 1; }
+  // Implementation of indexes(). Produces iterators that wrap an index. Mostly
+  // to hide the awkwardness of unsigned wrapping when iterating over valid
+  // indexes.
+  struct index_iterator {
+    unsigned NumAttrSets;
+    index_iterator(int NumAttrSets) : NumAttrSets(NumAttrSets) {}
+    struct int_wrapper {
+      int_wrapper(unsigned i) : i(i) {}
+      unsigned i;
+      unsigned operator*() { return i; }
+      bool operator!=(const int_wrapper &Other) { return i != Other.i; }
+      int_wrapper &operator++() {
+        // This is expected to undergo unsigned wrapping since FunctionIndex is
+        // ~0 and that's where we start.
+        ++i;
+        return *this;
+      }
+    };
+
+    int_wrapper begin() { return int_wrapper(AttributeList::FunctionIndex); }
+
+    int_wrapper end() { return int_wrapper(NumAttrSets - 1); }
+  };
+
+  /// Use this to iterate over the valid attribute indexes.
+  index_iterator indexes() const { return index_iterator(getNumAttrSets()); }
 
   /// operator==/!= - Provide equality predicates.
   bool operator==(const AttributeList &RHS) const { return pImpl == RHS.pImpl; }
@@ -782,7 +898,7 @@ public:
 //===----------------------------------------------------------------------===//
 /// \class
 /// Provide DenseMapInfo for AttributeList.
-template <> struct DenseMapInfo<AttributeList> {
+template <> struct DenseMapInfo<AttributeList, void> {
   static AttributeList getEmptyKey() {
     auto Val = static_cast<uintptr_t>(-1);
     Val <<= PointerLikeTypeTraits<void*>::NumLowBitsAvailable;
@@ -814,14 +930,10 @@ template <> struct DenseMapInfo<AttributeList> {
 class AttrBuilder {
   std::bitset<Attribute::EndAttrKinds> Attrs;
   std::map<SmallString<32>, SmallString<32>, std::less<>> TargetDepAttrs;
-  MaybeAlign Alignment;
-  MaybeAlign StackAlignment;
-  uint64_t DerefBytes = 0;
-  uint64_t DerefOrNullBytes = 0;
-  uint64_t AllocSizeArgs = 0;
-  uint64_t VScaleRangeArgs = 0;
+  std::array<uint64_t, Attribute::NumIntAttrKinds> IntAttrs = {};
   std::array<Type *, Attribute::NumTypeAttrKinds> TypeAttrs = {};
 
+  Optional<unsigned> kindToIntIndex(Attribute::AttrKind Kind) const;
   Optional<unsigned> kindToTypeIndex(Attribute::AttrKind Kind) const;
 
 public:
@@ -891,19 +1003,31 @@ public:
   /// Return true if the builder has an alignment attribute.
   bool hasAlignmentAttr() const;
 
+  /// Return raw (possibly packed/encoded) value of integer attribute or 0 if
+  /// not set.
+  uint64_t getRawIntAttr(Attribute::AttrKind Kind) const;
+
   /// Retrieve the alignment attribute, if it exists.
-  MaybeAlign getAlignment() const { return Alignment; }
+  MaybeAlign getAlignment() const {
+    return MaybeAlign(getRawIntAttr(Attribute::Alignment));
+  }
 
   /// Retrieve the stack alignment attribute, if it exists.
-  MaybeAlign getStackAlignment() const { return StackAlignment; }
+  MaybeAlign getStackAlignment() const {
+    return MaybeAlign(getRawIntAttr(Attribute::StackAlignment));
+  }
 
   /// Retrieve the number of dereferenceable bytes, if the
   /// dereferenceable attribute exists (zero is returned otherwise).
-  uint64_t getDereferenceableBytes() const { return DerefBytes; }
+  uint64_t getDereferenceableBytes() const {
+    return getRawIntAttr(Attribute::Dereferenceable);
+  }
 
   /// Retrieve the number of dereferenceable_or_null bytes, if the
   /// dereferenceable_or_null attribute exists (zero is returned otherwise).
-  uint64_t getDereferenceableOrNullBytes() const { return DerefOrNullBytes; }
+  uint64_t getDereferenceableOrNullBytes() const {
+    return getRawIntAttr(Attribute::DereferenceableOrNull);
+  }
 
   /// Retrieve type for the given type attribute.
   Type *getTypeAttr(Attribute::AttrKind Kind) const;
@@ -933,6 +1057,9 @@ public:
   /// it doesn't exist, pair(0, 0) is returned.
   std::pair<unsigned, unsigned> getVScaleRangeArgs() const;
 
+  /// Add integer attribute with raw value (packed/encoded if necessary).
+  AttrBuilder &addRawIntAttr(Attribute::AttrKind Kind, uint64_t Value);
+
   /// This turns an alignment into the form used internally in Attribute.
   /// This call has no effect if Align is not set.
   AttrBuilder &addAlignmentAttr(MaybeAlign Align);
diff --git a/llvm/include/llvm/IR/Attributes.td b/llvm/include/llvm/IR/Attributes.td
index 99b474161df7..de25b51a6292 100644
--- a/llvm/include/llvm/IR/Attributes.td
+++ b/llvm/include/llvm/IR/Attributes.td
@@ -86,6 +86,9 @@ def Dereferenceable : IntAttr<"dereferenceable", [ParamAttr, RetAttr]>;
 def DereferenceableOrNull : IntAttr<"dereferenceable_or_null",
                                     [ParamAttr, RetAttr]>;
 
+/// Do not instrument function with sanitizers.
+def DisableSanitizerInstrumentation: EnumAttr<"disable_sanitizer_instrumentation", [FnAttr]>;
+
 /// Provide pointer element type to intrinsic.
 def ElementType : TypeAttr<"elementtype", [ParamAttr]>;
 
diff --git a/llvm/include/llvm/IR/BasicBlock.h b/llvm/include/llvm/IR/BasicBlock.h
index 0af4ec4ef138..184ddfc01c29 100644
--- a/llvm/include/llvm/IR/BasicBlock.h
+++ b/llvm/include/llvm/IR/BasicBlock.h
@@ -167,8 +167,8 @@ public:
   /// Returns a pointer to the first instruction in this block that is not a
   /// PHINode or a debug intrinsic, or any pseudo operation if \c SkipPseudoOp
   /// is true.
-  const Instruction *getFirstNonPHIOrDbg(bool SkipPseudoOp = false) const;
-  Instruction *getFirstNonPHIOrDbg(bool SkipPseudoOp = false) {
+  const Instruction *getFirstNonPHIOrDbg(bool SkipPseudoOp = true) const;
+  Instruction *getFirstNonPHIOrDbg(bool SkipPseudoOp = true) {
     return const_cast<Instruction *>(
         static_cast<const BasicBlock *>(this)->getFirstNonPHIOrDbg(
             SkipPseudoOp));
@@ -178,8 +178,8 @@ public:
   /// PHINode, a debug intrinsic, or a lifetime intrinsic, or any pseudo
   /// operation if \c SkipPseudoOp is true.
   const Instruction *
-  getFirstNonPHIOrDbgOrLifetime(bool SkipPseudoOp = false) const;
-  Instruction *getFirstNonPHIOrDbgOrLifetime(bool SkipPseudoOp = false) {
+  getFirstNonPHIOrDbgOrLifetime(bool SkipPseudoOp = true) const;
+  Instruction *getFirstNonPHIOrDbgOrLifetime(bool SkipPseudoOp = true) {
     return const_cast<Instruction *>(
         static_cast<const BasicBlock *>(this)->getFirstNonPHIOrDbgOrLifetime(
             SkipPseudoOp));
@@ -200,14 +200,14 @@ public:
   /// SkipPseudoOp is true.
   iterator_range<filter_iterator<BasicBlock::const_iterator,
                                  std::function<bool(const Instruction &)>>>
-  instructionsWithoutDebug(bool SkipPseudoOp = false) const;
+  instructionsWithoutDebug(bool SkipPseudoOp = true) const;
 
   /// Return an iterator range over the instructions in the block, skipping any
   /// debug instructions. Skip and any pseudo operations as well if \c
   /// SkipPseudoOp is true.
   iterator_range<
       filter_iterator<BasicBlock::iterator, std::function<bool(Instruction &)>>>
-  instructionsWithoutDebug(bool SkipPseudoOp = false);
+  instructionsWithoutDebug(bool SkipPseudoOp = true);
 
   /// Return the size of the basic block ignoring debug instructions
   filter_iterator<BasicBlock::const_iterator,
diff --git a/llvm/include/llvm/IR/Constant.h b/llvm/include/llvm/IR/Constant.h
index 4e2022b36e30..c8999b71f3d1 100644
--- a/llvm/include/llvm/IR/Constant.h
+++ b/llvm/include/llvm/IR/Constant.h
@@ -198,6 +198,12 @@ public:
   /// hanging off of the globals.
   void removeDeadConstantUsers() const;
 
+  /// Return true if the constant has exactly one live use.
+  ///
+  /// This returns the same result as calling Value::hasOneUse after
+  /// Constant::removeDeadConstantUsers, but doesn't remove dead constants.
+  bool hasOneLiveUse() const;
+
   const Constant *stripPointerCasts() const {
     return cast<Constant>(Value::stripPointerCasts());
   }
diff --git a/llvm/include/llvm/IR/ConstantRange.h b/llvm/include/llvm/IR/ConstantRange.h
index 44b8c395c89e..fea4d0da1d0d 100644
--- a/llvm/include/llvm/IR/ConstantRange.h
+++ b/llvm/include/llvm/IR/ConstantRange.h
@@ -128,6 +128,28 @@ public:
   /// NOTE: false does not mean that inverse predicate holds!
   bool icmp(CmpInst::Predicate Pred, const ConstantRange &Other) const;
 
+  /// Return true iff CR1 ult CR2 is equivalent to CR1 slt CR2.
+  /// Does not depend on strictness/direction of the predicate.
+  static bool
+  areInsensitiveToSignednessOfICmpPredicate(const ConstantRange &CR1,
+                                            const ConstantRange &CR2);
+
+  /// Return true iff CR1 ult CR2 is equivalent to CR1 sge CR2.
+  /// Does not depend on strictness/direction of the predicate.
+  static bool
+  areInsensitiveToSignednessOfInvertedICmpPredicate(const ConstantRange &CR1,
+                                                    const ConstantRange &CR2);
+
+  /// If the comparison between constant ranges this and Other
+  /// is insensitive to the signedness of the comparison predicate,
+  /// return a predicate equivalent to \p Pred, with flipped signedness
+  /// (i.e. unsigned instead of signed or vice versa), and maybe inverted,
+  /// otherwise returns CmpInst::Predicate::BAD_ICMP_PREDICATE.
+  static CmpInst::Predicate
+  getEquivalentPredWithFlippedSignedness(CmpInst::Predicate Pred,
+                                         const ConstantRange &CR1,
+                                         const ConstantRange &CR2);
+
   /// Produce the largest range containing all X such that "X BinOp Y" is
   /// guaranteed not to wrap (overflow) for *all* Y in Other. However, there may
   /// be *some* Y in Other for which additional X not contained in the result
@@ -167,6 +189,11 @@ public:
   /// successful.
   bool getEquivalentICmp(CmpInst::Predicate &Pred, APInt &RHS) const;
 
+  /// Set up \p Pred, \p RHS and \p Offset such that (V + Offset) Pred RHS
+  /// is true iff V is in the range. Prefers using Offset == 0 if possible.
+  void
+  getEquivalentICmp(CmpInst::Predicate &Pred, APInt &RHS, APInt &Offset) const;
+
   /// Return the lower value for this range.
   const APInt &getLower() const { return Lower; }
 
@@ -305,6 +332,14 @@ public:
   ConstantRange unionWith(const ConstantRange &CR,
                           PreferredRangeType Type = Smallest) const;
 
+  /// Intersect the two ranges and return the result if it can be represented
+  /// exactly, otherwise return None.
+  Optional<ConstantRange> exactIntersectWith(const ConstantRange &CR) const;
+
+  /// Union the two ranges and return the result if it can be represented
+  /// exactly, otherwise return None.
+  Optional<ConstantRange> exactUnionWith(const ConstantRange &CR) const;
+
   /// Return a new range representing the possible values resulting
   /// from an application of the specified cast operator to this range. \p
   /// BitWidth is the target bitwidth of the cast.  For casts which don't
@@ -383,6 +418,11 @@ public:
   /// treating both this and \p Other as unsigned ranges.
   ConstantRange multiply(const ConstantRange &Other) const;
 
+  /// Return range of possible values for a signed multiplication of this and
+  /// \p Other. However, if overflow is possible always return a full range
+  /// rather than trying to determine a more precise result.
+  ConstantRange smul_fast(const ConstantRange &Other) const;
+
   /// Return a new range representing the possible values resulting
   /// from a signed maximum of a value in this range and a value in \p Other.
   ConstantRange smax(const ConstantRange &Other) const;
diff --git a/llvm/include/llvm/IR/Constants.h b/llvm/include/llvm/IR/Constants.h
index 1f716a45b70f..71414d95d9a3 100644
--- a/llvm/include/llvm/IR/Constants.h
+++ b/llvm/include/llvm/IR/Constants.h
@@ -191,19 +191,19 @@ public:
   /// This is just a convenience method to make client code smaller for a
   /// common code. It also correctly performs the comparison without the
   /// potential for an assertion from getZExtValue().
-  bool isZero() const { return Val.isNullValue(); }
+  bool isZero() const { return Val.isZero(); }
 
   /// This is just a convenience method to make client code smaller for a
   /// common case. It also correctly performs the comparison without the
   /// potential for an assertion from getZExtValue().
   /// Determine if the value is one.
-  bool isOne() const { return Val.isOneValue(); }
+  bool isOne() const { return Val.isOne(); }
 
   /// This function will return true iff every bit in this constant is set
   /// to true.
   /// @returns true iff this constant's bits are all set to true.
   /// Determine if the value is all ones.
-  bool isMinusOne() const { return Val.isAllOnesValue(); }
+  bool isMinusOne() const { return Val.isAllOnes(); }
 
   /// This function will return true iff this constant represents the largest
   /// value that may be represented by the constant's type.
@@ -1287,10 +1287,6 @@ public:
   /// Return a string representation for an opcode.
   const char *getOpcodeName() const;
 
-  /// Return a constant expression identical to this one, but with the specified
-  /// operand set to the specified value.
-  Constant *getWithOperandReplaced(unsigned OpNo, Constant *Op) const;
-
   /// This returns the current constant expression with the operands replaced
   /// with the specified values. The specified array must have the same number
   /// of operands as our current one.
@@ -1312,13 +1308,14 @@ public:
                             Type *SrcTy = nullptr) const;
 
   /// Returns an Instruction which implements the same operation as this
-  /// ConstantExpr. The instruction is not linked to any basic block.
+  /// ConstantExpr. If \p InsertBefore is not null, the new instruction is
+  /// inserted before it, otherwise it is not inserted into any basic block.
   ///
   /// A better approach to this could be to have a constructor for Instruction
   /// which would take a ConstantExpr parameter, but that would have spread
   /// implementation details of ConstantExpr outside of Constants.cpp, which
   /// would make it harder to remove ConstantExprs altogether.
-  Instruction *getAsInstruction() const;
+  Instruction *getAsInstruction(Instruction *InsertBefore = nullptr) const;
 
   /// Methods for support type inquiry through isa, cast, and dyn_cast:
   static bool classof(const Value *V) {
diff --git a/llvm/include/llvm/IR/DIBuilder.h b/llvm/include/llvm/IR/DIBuilder.h
index 23ac47ca4d81..61c6dd885980 100644
--- a/llvm/include/llvm/IR/DIBuilder.h
+++ b/llvm/include/llvm/IR/DIBuilder.h
@@ -181,7 +181,7 @@ namespace llvm {
                                      DIFile *File);
 
     /// Create a single enumerator value.
-    DIEnumerator *createEnumerator(StringRef Name, APSInt Value);
+    DIEnumerator *createEnumerator(StringRef Name, const APSInt &Value);
     DIEnumerator *createEnumerator(StringRef Name, uint64_t Val,
                                    bool IsUnsigned = false);
 
@@ -219,11 +219,12 @@ namespace llvm {
     /// \param AlignInBits       Alignment. (optional)
     /// \param DWARFAddressSpace DWARF address space. (optional)
     /// \param Name              Pointer type name. (optional)
-    DIDerivedType *createPointerType(DIType *PointeeTy, uint64_t SizeInBits,
-                                     uint32_t AlignInBits = 0,
-                                     Optional<unsigned> DWARFAddressSpace =
-                                         None,
-                                     StringRef Name = "");
+    /// \param Annotations       Member annotations.
+    DIDerivedType *
+    createPointerType(DIType *PointeeTy, uint64_t SizeInBits,
+                      uint32_t AlignInBits = 0,
+                      Optional<unsigned> DWARFAddressSpace = None,
+                      StringRef Name = "", DINodeArray Annotations = nullptr);
 
     /// Create debugging information entry for a pointer to member.
     /// \param PointeeTy Type pointed to by this pointer.
@@ -250,9 +251,11 @@ namespace llvm {
     /// \param LineNo      Line number.
     /// \param Context     The surrounding context for the typedef.
     /// \param AlignInBits Alignment. (optional)
+    /// \param Annotations Annotations. (optional)
     DIDerivedType *createTypedef(DIType *Ty, StringRef Name, DIFile *File,
                                  unsigned LineNo, DIScope *Context,
-                                 uint32_t AlignInBits = 0);
+                                 uint32_t AlignInBits = 0,
+                                 DINodeArray Annotations = nullptr);
 
     /// Create debugging information entry for a 'friend'.
     DIDerivedType *createFriend(DIType *Ty, DIType *FriendTy);
@@ -279,12 +282,13 @@ namespace llvm {
     /// \param OffsetInBits Member offset.
     /// \param Flags        Flags to encode member attribute, e.g. private
     /// \param Ty           Parent type.
+    /// \param Annotations  Member annotations.
     DIDerivedType *createMemberType(DIScope *Scope, StringRef Name,
                                     DIFile *File, unsigned LineNo,
-                                    uint64_t SizeInBits,
-                                    uint32_t AlignInBits,
+                                    uint64_t SizeInBits, uint32_t AlignInBits,
                                     uint64_t OffsetInBits,
-                                    DINode::DIFlags Flags, DIType *Ty);
+                                    DINode::DIFlags Flags, DIType *Ty,
+                                    DINodeArray Annotations = nullptr);
 
     /// Create debugging information entry for a variant.  A variant
     /// normally should be a member of a variant part.
@@ -317,10 +321,14 @@ namespace llvm {
     /// \param StorageOffsetInBits Member storage offset.
     /// \param Flags               Flags to encode member attribute.
     /// \param Ty                  Parent type.
-    DIDerivedType *createBitFieldMemberType(
-        DIScope *Scope, StringRef Name, DIFile *File, unsigned LineNo,
-        uint64_t SizeInBits, uint64_t OffsetInBits,
-        uint64_t StorageOffsetInBits, DINode::DIFlags Flags, DIType *Ty);
+    /// \param Annotations         Member annotations.
+    DIDerivedType *createBitFieldMemberType(DIScope *Scope, StringRef Name,
+                                            DIFile *File, unsigned LineNo,
+                                            uint64_t SizeInBits,
+                                            uint64_t OffsetInBits,
+                                            uint64_t StorageOffsetInBits,
+                                            DINode::DIFlags Flags, DIType *Ty,
+                                            DINodeArray Annotations = nullptr);
 
     /// Create debugging information entry for a
     /// C++ static data member.
@@ -586,7 +594,7 @@ namespace llvm {
         unsigned Tag, StringRef Name, DIScope *Scope, DIFile *F, unsigned Line,
         unsigned RuntimeLang = 0, uint64_t SizeInBits = 0,
         uint32_t AlignInBits = 0, DINode::DIFlags Flags = DINode::FlagFwdDecl,
-        StringRef UniqueIdentifier = "");
+        StringRef UniqueIdentifier = "", DINodeArray Annotations = nullptr);
 
     /// Retain DIScope* in a module even if it is not referenced
     /// through debug info anchors.
@@ -636,7 +644,8 @@ namespace llvm {
         DIScope *Context, StringRef Name, StringRef LinkageName, DIFile *File,
         unsigned LineNo, DIType *Ty, bool IsLocalToUnit, bool isDefined = true,
         DIExpression *Expr = nullptr, MDNode *Decl = nullptr,
-        MDTuple *TemplateParams = nullptr, uint32_t AlignInBits = 0);
+        MDTuple *TemplateParams = nullptr, uint32_t AlignInBits = 0,
+        DINodeArray Annotations = nullptr);
 
     /// Identical to createGlobalVariable
     /// except that the resulting DbgNode is temporary and meant to be RAUWed.
@@ -682,7 +691,8 @@ namespace llvm {
     createParameterVariable(DIScope *Scope, StringRef Name, unsigned ArgNo,
                             DIFile *File, unsigned LineNo, DIType *Ty,
                             bool AlwaysPreserve = false,
-                            DINode::DIFlags Flags = DINode::FlagZero);
+                            DINode::DIFlags Flags = DINode::FlagZero,
+                            DINodeArray Annotations = nullptr);
 
     /// Create a new descriptor for the specified
     /// variable which has a complex address expression for its address.
@@ -711,6 +721,7 @@ namespace llvm {
     /// \param SPFlags       Additional flags specific to subprograms.
     /// \param TParams       Function template parameters.
     /// \param ThrownTypes   Exception types this function may throw.
+    /// \param Annotations   Attribute Annotations.
     DISubprogram *
     createFunction(DIScope *Scope, StringRef Name, StringRef LinkageName,
                    DIFile *File, unsigned LineNo, DISubroutineType *Ty,
@@ -718,7 +729,8 @@ namespace llvm {
                    DISubprogram::DISPFlags SPFlags = DISubprogram::SPFlagZero,
                    DITemplateParameterArray TParams = nullptr,
                    DISubprogram *Decl = nullptr,
-                   DITypeArray ThrownTypes = nullptr);
+                   DITypeArray ThrownTypes = nullptr,
+                   DINodeArray Annotations = nullptr);
 
     /// Identical to createFunction,
     /// except that the resulting DbgNode is meant to be RAUWed.
@@ -818,29 +830,35 @@ namespace llvm {
                                        unsigned Line, unsigned Col);
 
     /// Create a descriptor for an imported module.
-    /// \param Context The scope this module is imported into
-    /// \param NS      The namespace being imported here.
-    /// \param File    File where the declaration is located.
-    /// \param Line    Line number of the declaration.
+    /// \param Context        The scope this module is imported into
+    /// \param NS             The namespace being imported here.
+    /// \param File           File where the declaration is located.
+    /// \param Line           Line number of the declaration.
+    /// \param Elements       Renamed elements.
     DIImportedEntity *createImportedModule(DIScope *Context, DINamespace *NS,
-                                           DIFile *File, unsigned Line);
+                                           DIFile *File, unsigned Line,
+                                           DINodeArray Elements = nullptr);
 
     /// Create a descriptor for an imported module.
     /// \param Context The scope this module is imported into.
     /// \param NS      An aliased namespace.
     /// \param File    File where the declaration is located.
     /// \param Line    Line number of the declaration.
+    /// \param Elements       Renamed elements.
     DIImportedEntity *createImportedModule(DIScope *Context,
                                            DIImportedEntity *NS, DIFile *File,
-                                           unsigned Line);
+                                           unsigned Line,
+                                           DINodeArray Elements = nullptr);
 
     /// Create a descriptor for an imported module.
-    /// \param Context The scope this module is imported into.
-    /// \param M       The module being imported here
-    /// \param File    File where the declaration is located.
-    /// \param Line    Line number of the declaration.
+    /// \param Context        The scope this module is imported into.
+    /// \param M              The module being imported here
+    /// \param File           File where the declaration is located.
+    /// \param Line           Line number of the declaration.
+    /// \param Elements       Renamed elements.
     DIImportedEntity *createImportedModule(DIScope *Context, DIModule *M,
-                                           DIFile *File, unsigned Line);
+                                           DIFile *File, unsigned Line,
+                                           DINodeArray Elements = nullptr);
 
     /// Create a descriptor for an imported function.
     /// \param Context The scope this module is imported into.
@@ -848,9 +866,11 @@ namespace llvm {
     ///                variable.
     /// \param File    File where the declaration is located.
     /// \param Line    Line number of the declaration.
+    /// \param Elements       Renamed elements.
     DIImportedEntity *createImportedDeclaration(DIScope *Context, DINode *Decl,
                                                 DIFile *File, unsigned Line,
-                                                StringRef Name = "");
+                                                StringRef Name = "",
+                                                DINodeArray Elements = nullptr);
 
     /// Insert a new llvm.dbg.declare intrinsic call.
     /// \param Storage     llvm::Value of the variable
diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h
index 300f73c12df0..46acd403bef1 100644
--- a/llvm/include/llvm/IR/DataLayout.h
+++ b/llvm/include/llvm/IR/DataLayout.h
@@ -19,6 +19,7 @@
 #ifndef LLVM_IR_DATALAYOUT_H
 #define LLVM_IR_DATALAYOUT_H
 
+#include "llvm/ADT/APInt.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
@@ -135,6 +136,7 @@ private:
     MM_MachO,
     MM_WinCOFF,
     MM_WinCOFFX86,
+    MM_GOFF,
     MM_Mips,
     MM_XCOFF
   };
@@ -316,6 +318,7 @@ public:
     switch (ManglingMode) {
     case MM_None:
     case MM_ELF:
+    case MM_GOFF:
     case MM_Mips:
     case MM_WinCOFF:
     case MM_XCOFF:
@@ -334,6 +337,8 @@ public:
     case MM_ELF:
     case MM_WinCOFF:
       return ".L";
+    case MM_GOFF:
+      return "@";
     case MM_Mips:
       return "$";
     case MM_MachO:
@@ -372,8 +377,8 @@ public:
   /// the backends/clients are updated.
   unsigned getPointerSize(unsigned AS = 0) const;
 
-  /// Returns the maximum pointer size over all address spaces.
-  unsigned getMaxPointerSize() const;
+  /// Returns the maximum index size over all address spaces.
+  unsigned getMaxIndexSize() const;
 
   // Index size used for address calculation.
   unsigned getIndexSize(unsigned AS) const;
@@ -405,9 +410,9 @@ public:
     return getPointerSize(AS) * 8;
   }
 
-  /// Returns the maximum pointer size over all address spaces.
-  unsigned getMaxPointerSizeInBits() const {
-    return getMaxPointerSize() * 8;
+  /// Returns the maximum index size over all address spaces.
+  unsigned getMaxIndexSizeInBits() const {
+    return getMaxIndexSize() * 8;
   }
 
   /// Size in bits of index used for address calculation in getelementptr.
@@ -514,7 +519,7 @@ public:
 
   /// Returns the minimum ABI-required alignment for the specified type.
   /// FIXME: Deprecate this function once migration to Align is over.
-  unsigned getABITypeAlignment(Type *Ty) const;
+  uint64_t getABITypeAlignment(Type *Ty) const;
 
   /// Returns the minimum ABI-required alignment for the specified type.
   Align getABITypeAlign(Type *Ty) const;
@@ -537,7 +542,7 @@ public:
   ///
   /// This is always at least as good as the ABI alignment.
   /// FIXME: Deprecate this function once migration to Align is over.
-  unsigned getPrefTypeAlignment(Type *Ty) const;
+  uint64_t getPrefTypeAlignment(Type *Ty) const;
 
   /// Returns the preferred stack/global alignment for the specified
   /// type.
@@ -579,6 +584,10 @@ public:
   /// This is used to implement getelementptr.
   int64_t getIndexedOffsetInType(Type *ElemTy, ArrayRef<Value *> Indices) const;
 
+  /// Get GEP indices to access Offset inside ElemTy. ElemTy is updated to be
+  /// the result element type and Offset to be the residual offset.
+  SmallVector<APInt> getGEPIndicesForOffset(Type *&ElemTy, APInt &Offset) const;
+
   /// Returns a StructLayout object, indicating the alignment of the
   /// struct, its size, and the offsets of its fields.
   ///
diff --git a/llvm/include/llvm/IR/DebugInfo.h b/llvm/include/llvm/IR/DebugInfo.h
index eba422a9fde6..730c69d0c622 100644
--- a/llvm/include/llvm/IR/DebugInfo.h
+++ b/llvm/include/llvm/IR/DebugInfo.h
@@ -106,8 +106,6 @@ public:
   void reset();
 
 private:
-  void InitializeTypeMap(const Module &M);
-
   void processCompileUnit(DICompileUnit *CU);
   void processScope(DIScope *Scope);
   void processType(DIType *DT);
diff --git a/llvm/include/llvm/IR/DebugInfoMetadata.h b/llvm/include/llvm/IR/DebugInfoMetadata.h
index 20a032f04909..c04f07c534af 100644
--- a/llvm/include/llvm/IR/DebugInfoMetadata.h
+++ b/llvm/include/llvm/IR/DebugInfoMetadata.h
@@ -256,11 +256,13 @@ class GenericDINode : public DINode {
 public:
   unsigned getHash() const { return SubclassData32; }
 
-  DEFINE_MDNODE_GET(GenericDINode, (unsigned Tag, StringRef Header,
-                                    ArrayRef<Metadata *> DwarfOps),
+  DEFINE_MDNODE_GET(GenericDINode,
+                    (unsigned Tag, StringRef Header,
+                     ArrayRef<Metadata *> DwarfOps),
                     (Tag, Header, DwarfOps))
-  DEFINE_MDNODE_GET(GenericDINode, (unsigned Tag, MDString *Header,
-                                    ArrayRef<Metadata *> DwarfOps),
+  DEFINE_MDNODE_GET(GenericDINode,
+                    (unsigned Tag, MDString *Header,
+                     ArrayRef<Metadata *> DwarfOps),
                     (Tag, Header, DwarfOps))
 
   /// Return a (temporary) clone of this.
@@ -324,7 +326,7 @@ public:
   DEFINE_MDNODE_GET(DISubrange, (int64_t Count, int64_t LowerBound = 0),
                     (Count, LowerBound))
 
-  DEFINE_MDNODE_GET(DISubrange, (Metadata *CountNode, int64_t LowerBound = 0),
+  DEFINE_MDNODE_GET(DISubrange, (Metadata * CountNode, int64_t LowerBound = 0),
                     (CountNode, LowerBound))
 
   DEFINE_MDNODE_GET(DISubrange,
@@ -334,9 +336,7 @@ public:
 
   TempDISubrange clone() const { return cloneImpl(); }
 
-  Metadata *getRawCountNode() const {
-    return getOperand(0).get();
-  }
+  Metadata *getRawCountNode() const { return getOperand(0).get(); }
 
   Metadata *getRawLowerBound() const { return getOperand(1).get(); }
 
@@ -548,14 +548,13 @@ public:
   };
 
   /// A single checksum, represented by a \a Kind and a \a Value (a string).
-  template <typename T>
-  struct ChecksumInfo {
+  template <typename T> struct ChecksumInfo {
     /// The kind of checksum which \a Value encodes.
     ChecksumKind Kind;
     /// The string value of the checksum.
     T Value;
 
-    ChecksumInfo(ChecksumKind Kind, T Value) : Kind(Kind), Value(Value) { }
+    ChecksumInfo(ChecksumKind Kind, T Value) : Kind(Kind), Value(Value) {}
     ~ChecksumInfo() = default;
     bool operator==(const ChecksumInfo<T> &X) const {
       return Kind == X.Kind && Value == X.Value;
@@ -578,15 +577,17 @@ private:
   static DIFile *getImpl(LLVMContext &Context, StringRef Filename,
                          StringRef Directory,
                          Optional<ChecksumInfo<StringRef>> CS,
-                         Optional<StringRef> Source,
-                         StorageType Storage, bool ShouldCreate = true) {
+                         Optional<StringRef> Source, StorageType Storage,
+                         bool ShouldCreate = true) {
     Optional<ChecksumInfo<MDString *>> MDChecksum;
     if (CS)
       MDChecksum.emplace(CS->Kind, getCanonicalMDString(Context, CS->Value));
-    return getImpl(Context, getCanonicalMDString(Context, Filename),
-                   getCanonicalMDString(Context, Directory), MDChecksum,
-                   Source ? Optional<MDString *>(getCanonicalMDString(Context, *Source)) : None,
-                   Storage, ShouldCreate);
+    return getImpl(
+        Context, getCanonicalMDString(Context, Filename),
+        getCanonicalMDString(Context, Directory), MDChecksum,
+        Source ? Optional<MDString *>(getCanonicalMDString(Context, *Source))
+               : None,
+        Storage, ShouldCreate);
   }
   static DIFile *getImpl(LLVMContext &Context, MDString *Filename,
                          MDString *Directory,
@@ -600,13 +601,15 @@ private:
   }
 
 public:
-  DEFINE_MDNODE_GET(DIFile, (StringRef Filename, StringRef Directory,
-                             Optional<ChecksumInfo<StringRef>> CS = None,
-                             Optional<StringRef> Source = None),
+  DEFINE_MDNODE_GET(DIFile,
+                    (StringRef Filename, StringRef Directory,
+                     Optional<ChecksumInfo<StringRef>> CS = None,
+                     Optional<StringRef> Source = None),
                     (Filename, Directory, CS, Source))
-  DEFINE_MDNODE_GET(DIFile, (MDString * Filename, MDString *Directory,
-                             Optional<ChecksumInfo<MDString *>> CS = None,
-                             Optional<MDString *> Source = None),
+  DEFINE_MDNODE_GET(DIFile,
+                    (MDString * Filename, MDString *Directory,
+                     Optional<ChecksumInfo<MDString *>> CS = None,
+                     Optional<MDString *> Source = None),
                     (Filename, Directory, CS, Source))
 
   TempDIFile clone() const { return cloneImpl(); }
@@ -707,7 +710,6 @@ public:
   DIScope *getScope() const { return cast_or_null<DIScope>(getRawScope()); }
   StringRef getName() const { return getStringOperand(2); }
 
-
   Metadata *getRawScope() const { return getOperand(1); }
   MDString *getRawName() const { return getOperandAs<MDString>(2); }
 
@@ -936,47 +938,48 @@ class DIDerivedType : public DIType {
           unsigned Line, DIScope *Scope, DIType *BaseType, uint64_t SizeInBits,
           uint32_t AlignInBits, uint64_t OffsetInBits,
           Optional<unsigned> DWARFAddressSpace, DIFlags Flags,
-          Metadata *ExtraData, StorageType Storage, bool ShouldCreate = true) {
+          Metadata *ExtraData, DINodeArray Annotations, StorageType Storage,
+          bool ShouldCreate = true) {
     return getImpl(Context, Tag, getCanonicalMDString(Context, Name), File,
                    Line, Scope, BaseType, SizeInBits, AlignInBits, OffsetInBits,
-                   DWARFAddressSpace, Flags, ExtraData, Storage, ShouldCreate);
-  }
-  static DIDerivedType *getImpl(LLVMContext &Context, unsigned Tag,
-                                MDString *Name, Metadata *File, unsigned Line,
-                                Metadata *Scope, Metadata *BaseType,
-                                uint64_t SizeInBits, uint32_t AlignInBits,
-                                uint64_t OffsetInBits,
-                                Optional<unsigned> DWARFAddressSpace,
-                                DIFlags Flags, Metadata *ExtraData,
-                                StorageType Storage, bool ShouldCreate = true);
+                   DWARFAddressSpace, Flags, ExtraData, Annotations.get(),
+                   Storage, ShouldCreate);
+  }
+  static DIDerivedType *
+  getImpl(LLVMContext &Context, unsigned Tag, MDString *Name, Metadata *File,
+          unsigned Line, Metadata *Scope, Metadata *BaseType,
+          uint64_t SizeInBits, uint32_t AlignInBits, uint64_t OffsetInBits,
+          Optional<unsigned> DWARFAddressSpace, DIFlags Flags,
+          Metadata *ExtraData, Metadata *Annotations, StorageType Storage,
+          bool ShouldCreate = true);
 
   TempDIDerivedType cloneImpl() const {
-    return getTemporary(getContext(), getTag(), getName(), getFile(), getLine(),
-                        getScope(), getBaseType(), getSizeInBits(),
-                        getAlignInBits(), getOffsetInBits(),
-                        getDWARFAddressSpace(), getFlags(), getExtraData());
+    return getTemporary(
+        getContext(), getTag(), getName(), getFile(), getLine(), getScope(),
+        getBaseType(), getSizeInBits(), getAlignInBits(), getOffsetInBits(),
+        getDWARFAddressSpace(), getFlags(), getExtraData(), getAnnotations());
   }
 
 public:
-  DEFINE_MDNODE_GET(DIDerivedType,
-                    (unsigned Tag, MDString *Name, Metadata *File,
-                     unsigned Line, Metadata *Scope, Metadata *BaseType,
-                     uint64_t SizeInBits, uint32_t AlignInBits,
-                     uint64_t OffsetInBits,
-                     Optional<unsigned> DWARFAddressSpace, DIFlags Flags,
-                     Metadata *ExtraData = nullptr),
-                    (Tag, Name, File, Line, Scope, BaseType, SizeInBits,
-                     AlignInBits, OffsetInBits, DWARFAddressSpace, Flags,
-                     ExtraData))
+  DEFINE_MDNODE_GET(
+      DIDerivedType,
+      (unsigned Tag, MDString *Name, Metadata *File, unsigned Line,
+       Metadata *Scope, Metadata *BaseType, uint64_t SizeInBits,
+       uint32_t AlignInBits, uint64_t OffsetInBits,
+       Optional<unsigned> DWARFAddressSpace, DIFlags Flags,
+       Metadata *ExtraData = nullptr, Metadata *Annotations = nullptr),
+      (Tag, Name, File, Line, Scope, BaseType, SizeInBits, AlignInBits,
+       OffsetInBits, DWARFAddressSpace, Flags, ExtraData, Annotations))
   DEFINE_MDNODE_GET(DIDerivedType,
                     (unsigned Tag, StringRef Name, DIFile *File, unsigned Line,
                      DIScope *Scope, DIType *BaseType, uint64_t SizeInBits,
                      uint32_t AlignInBits, uint64_t OffsetInBits,
                      Optional<unsigned> DWARFAddressSpace, DIFlags Flags,
-                     Metadata *ExtraData = nullptr),
+                     Metadata *ExtraData = nullptr,
+                     DINodeArray Annotations = nullptr),
                     (Tag, Name, File, Line, Scope, BaseType, SizeInBits,
                      AlignInBits, OffsetInBits, DWARFAddressSpace, Flags,
-                     ExtraData))
+                     ExtraData, Annotations))
 
   TempDIDerivedType clone() const { return cloneImpl(); }
 
@@ -999,6 +1002,12 @@ public:
   Metadata *getExtraData() const { return getRawExtraData(); }
   Metadata *getRawExtraData() const { return getOperand(4); }
 
+  /// Get annotations associated with this derived type.
+  DINodeArray getAnnotations() const {
+    return cast_or_null<MDTuple>(getRawAnnotations());
+  }
+  Metadata *getRawAnnotations() const { return getOperand(5); }
+
   /// Get casted version of extra data.
   /// @{
   DIType *getClassType() const {
@@ -1065,8 +1074,8 @@ class DICompositeType : public DIType {
 
   /// Change fields in place.
   void mutate(unsigned Tag, unsigned Line, unsigned RuntimeLang,
-              uint64_t SizeInBits, uint32_t AlignInBits,
-              uint64_t OffsetInBits, DIFlags Flags) {
+              uint64_t SizeInBits, uint32_t AlignInBits, uint64_t OffsetInBits,
+              DIFlags Flags) {
     assert(isDistinct() && "Only distinct nodes can mutate");
     assert(getRawIdentifier() && "Only ODR-uniqued nodes should mutate");
     this->RuntimeLang = RuntimeLang;
@@ -1081,13 +1090,14 @@ class DICompositeType : public DIType {
           DITemplateParameterArray TemplateParams, StringRef Identifier,
           DIDerivedType *Discriminator, Metadata *DataLocation,
           Metadata *Associated, Metadata *Allocated, Metadata *Rank,
-          StorageType Storage, bool ShouldCreate = true) {
+          DINodeArray Annotations, StorageType Storage,
+          bool ShouldCreate = true) {
     return getImpl(
         Context, Tag, getCanonicalMDString(Context, Name), File, Line, Scope,
         BaseType, SizeInBits, AlignInBits, OffsetInBits, Flags, Elements.get(),
         RuntimeLang, VTableHolder, TemplateParams.get(),
         getCanonicalMDString(Context, Identifier), Discriminator, DataLocation,
-        Associated, Allocated, Rank, Storage, ShouldCreate);
+        Associated, Allocated, Rank, Annotations.get(), Storage, ShouldCreate);
   }
   static DICompositeType *
   getImpl(LLVMContext &Context, unsigned Tag, MDString *Name, Metadata *File,
@@ -1097,16 +1107,16 @@ class DICompositeType : public DIType {
           Metadata *VTableHolder, Metadata *TemplateParams,
           MDString *Identifier, Metadata *Discriminator, Metadata *DataLocation,
           Metadata *Associated, Metadata *Allocated, Metadata *Rank,
-          StorageType Storage, bool ShouldCreate = true);
+          Metadata *Annotations, StorageType Storage, bool ShouldCreate = true);
 
   TempDICompositeType cloneImpl() const {
-    return getTemporary(getContext(), getTag(), getName(), getFile(), getLine(),
-                        getScope(), getBaseType(), getSizeInBits(),
-                        getAlignInBits(), getOffsetInBits(), getFlags(),
-                        getElements(), getRuntimeLang(), getVTableHolder(),
-                        getTemplateParams(), getIdentifier(),
-                        getDiscriminator(), getRawDataLocation(),
-                        getRawAssociated(), getRawAllocated(), getRawRank());
+    return getTemporary(
+        getContext(), getTag(), getName(), getFile(), getLine(), getScope(),
+        getBaseType(), getSizeInBits(), getAlignInBits(), getOffsetInBits(),
+        getFlags(), getElements(), getRuntimeLang(), getVTableHolder(),
+        getTemplateParams(), getIdentifier(), getDiscriminator(),
+        getRawDataLocation(), getRawAssociated(), getRawAllocated(),
+        getRawRank(), getAnnotations());
   }
 
 public:
@@ -1119,10 +1129,12 @@ public:
        DITemplateParameterArray TemplateParams = nullptr,
        StringRef Identifier = "", DIDerivedType *Discriminator = nullptr,
        Metadata *DataLocation = nullptr, Metadata *Associated = nullptr,
-       Metadata *Allocated = nullptr, Metadata *Rank = nullptr),
+       Metadata *Allocated = nullptr, Metadata *Rank = nullptr,
+       DINodeArray Annotations = nullptr),
       (Tag, Name, File, Line, Scope, BaseType, SizeInBits, AlignInBits,
        OffsetInBits, Flags, Elements, RuntimeLang, VTableHolder, TemplateParams,
-       Identifier, Discriminator, DataLocation, Associated, Allocated, Rank))
+       Identifier, Discriminator, DataLocation, Associated, Allocated, Rank,
+       Annotations))
   DEFINE_MDNODE_GET(
       DICompositeType,
       (unsigned Tag, MDString *Name, Metadata *File, unsigned Line,
@@ -1132,10 +1144,11 @@ public:
        Metadata *TemplateParams = nullptr, MDString *Identifier = nullptr,
        Metadata *Discriminator = nullptr, Metadata *DataLocation = nullptr,
        Metadata *Associated = nullptr, Metadata *Allocated = nullptr,
-       Metadata *Rank = nullptr),
+       Metadata *Rank = nullptr, Metadata *Annotations = nullptr),
       (Tag, Name, File, Line, Scope, BaseType, SizeInBits, AlignInBits,
        OffsetInBits, Flags, Elements, RuntimeLang, VTableHolder, TemplateParams,
-       Identifier, Discriminator, DataLocation, Associated, Allocated, Rank))
+       Identifier, Discriminator, DataLocation, Associated, Allocated, Rank,
+       Annotations))
 
   TempDICompositeType clone() const { return cloneImpl(); }
 
@@ -1154,7 +1167,7 @@ public:
              unsigned RuntimeLang, Metadata *VTableHolder,
              Metadata *TemplateParams, Metadata *Discriminator,
              Metadata *DataLocation, Metadata *Associated, Metadata *Allocated,
-             Metadata *Rank);
+             Metadata *Rank, Metadata *Annotations);
   static DICompositeType *getODRTypeIfExists(LLVMContext &Context,
                                              MDString &Identifier);
 
@@ -1175,7 +1188,7 @@ public:
                unsigned RuntimeLang, Metadata *VTableHolder,
                Metadata *TemplateParams, Metadata *Discriminator,
                Metadata *DataLocation, Metadata *Associated,
-               Metadata *Allocated, Metadata *Rank);
+               Metadata *Allocated, Metadata *Rank, Metadata *Annotations);
 
   DIType *getBaseType() const { return cast_or_null<DIType>(getRawBaseType()); }
   DINodeArray getElements() const {
@@ -1196,7 +1209,9 @@ public:
   Metadata *getRawTemplateParams() const { return getOperand(6); }
   MDString *getRawIdentifier() const { return getOperandAs<MDString>(7); }
   Metadata *getRawDiscriminator() const { return getOperand(8); }
-  DIDerivedType *getDiscriminator() const { return getOperandAs<DIDerivedType>(8); }
+  DIDerivedType *getDiscriminator() const {
+    return getOperandAs<DIDerivedType>(8);
+  }
   Metadata *getRawDataLocation() const { return getOperand(9); }
   DIVariable *getDataLocation() const {
     return dyn_cast_or_null<DIVariable>(getRawDataLocation());
@@ -1228,6 +1243,11 @@ public:
     return dyn_cast_or_null<DIExpression>(getRawRank());
   }
 
+  Metadata *getRawAnnotations() const { return getOperand(13); }
+  DINodeArray getAnnotations() const {
+    return cast_or_null<MDTuple>(getRawAnnotations());
+  }
+
   /// Replace operands.
   ///
   /// If this \a isUniqued() and not \a isResolved(), on a uniquing collision
@@ -1507,9 +1527,7 @@ public:
   void replaceEnumTypes(DICompositeTypeArray N) {
     replaceOperandWith(4, N.get());
   }
-  void replaceRetainedTypes(DITypeArray N) {
-    replaceOperandWith(5, N.get());
-  }
+  void replaceRetainedTypes(DITypeArray N) { replaceOperandWith(5, N.get()); }
   void replaceGlobalVariables(DIGlobalVariableExpressionArray N) {
     replaceOperandWith(6, N.get());
   }
@@ -1691,7 +1709,8 @@ public:
   /// base discriminator is set in the new DILocation, the other encoded values
   /// are elided.
   /// If the discriminator cannot be encoded, the function returns None.
-  inline Optional<const DILocation *> cloneWithBaseDiscriminator(unsigned BD) const;
+  inline Optional<const DILocation *>
+  cloneWithBaseDiscriminator(unsigned BD) const;
 
   /// Returns the duplication factor stored in the discriminator, or 1 if no
   /// duplication factor (or 0) is encoded.
@@ -1707,7 +1726,8 @@ public:
   /// duplication factor encoded in the discriminator. The current duplication
   /// factor is as defined by getDuplicationFactor().
   /// Returns None if encoding failed.
-  inline Optional<const DILocation *> cloneByMultiplyingDuplicationFactor(unsigned DF) const;
+  inline Optional<const DILocation *>
+  cloneByMultiplyingDuplicationFactor(unsigned DF) const;
 
   /// When two instructions are combined into a single instruction we also
   /// need to combine the original locations into a single location.
@@ -1730,8 +1750,8 @@ public:
   /// This function applies getMergedLocation() repeatedly left-to-right.
   ///
   /// \p Locs: The locations to be merged.
-  static
-  const DILocation *getMergedLocations(ArrayRef<const DILocation *> Locs);
+  static const DILocation *
+  getMergedLocations(ArrayRef<const DILocation *> Locs);
 
   /// Return the masked discriminator value for an input discrimnator value D
   /// (i.e. zero out the (B+1)-th and above bits for D (B is 0-base).
@@ -1755,13 +1775,18 @@ public:
   /// Raw encoding of the discriminator. APIs such as cloneWithDuplicationFactor
   /// have certain special case behavior (e.g. treating empty duplication factor
   /// as the value '1').
-  /// This API, in conjunction with cloneWithDiscriminator, may be used to encode
-  /// the raw values provided. \p BD: base discriminator \p DF: duplication factor
+  /// This API, in conjunction with cloneWithDiscriminator, may be used to
+  /// encode the raw values provided.
+  ///
+  /// \p BD: base discriminator
+  /// \p DF: duplication factor
   /// \p CI: copy index
+  ///
   /// The return is None if the values cannot be encoded in 32 bits - for
-  /// example, values for BD or DF larger than 12 bits. Otherwise, the return
-  /// is the encoded value.
-  static Optional<unsigned> encodeDiscriminator(unsigned BD, unsigned DF, unsigned CI);
+  /// example, values for BD or DF larger than 12 bits. Otherwise, the return is
+  /// the encoded value.
+  static Optional<unsigned> encodeDiscriminator(unsigned BD, unsigned DF,
+                                                unsigned CI);
 
   /// Raw decoder for values in an encoded discriminator D.
   static void decodeDiscriminator(unsigned D, unsigned &BD, unsigned &DF,
@@ -1781,11 +1806,10 @@ public:
 
   /// Returns the copy identifier for a given encoded discriminator \p D.
   static unsigned getCopyIdentifierFromDiscriminator(unsigned D) {
-    return getUnsignedFromPrefixEncoding(getNextComponentInDiscriminator(
-        getNextComponentInDiscriminator(D)));
+    return getUnsignedFromPrefixEncoding(
+        getNextComponentInDiscriminator(getNextComponentInDiscriminator(D)));
   }
 
-
   Metadata *getRawScope() const { return getOperand(0); }
   Metadata *getRawInlinedAt() const {
     if (getNumOperands() == 2)
@@ -1839,10 +1863,10 @@ public:
                              unsigned Virtuality = SPFlagNonvirtual,
                              bool IsMainSubprogram = false) {
     // We're assuming virtuality is the low-order field.
-    static_assert(
-        int(SPFlagVirtual) == int(dwarf::DW_VIRTUALITY_virtual) &&
-            int(SPFlagPureVirtual) == int(dwarf::DW_VIRTUALITY_pure_virtual),
-        "Virtuality constant mismatch");
+    static_assert(int(SPFlagVirtual) == int(dwarf::DW_VIRTUALITY_virtual) &&
+                      int(SPFlagPureVirtual) ==
+                          int(dwarf::DW_VIRTUALITY_pure_virtual),
+                  "Virtuality constant mismatch");
     return static_cast<DISPFlags>(
         (Virtuality & SPFlagVirtuality) |
         (IsLocalToUnit ? SPFlagLocalToUnit : SPFlagZero) |
@@ -1874,23 +1898,23 @@ private:
           DISPFlags SPFlags, DICompileUnit *Unit,
           DITemplateParameterArray TemplateParams, DISubprogram *Declaration,
           DINodeArray RetainedNodes, DITypeArray ThrownTypes,
-          StorageType Storage, bool ShouldCreate = true) {
+          DINodeArray Annotations, StorageType Storage,
+          bool ShouldCreate = true) {
     return getImpl(Context, Scope, getCanonicalMDString(Context, Name),
                    getCanonicalMDString(Context, LinkageName), File, Line, Type,
                    ScopeLine, ContainingType, VirtualIndex, ThisAdjustment,
                    Flags, SPFlags, Unit, TemplateParams.get(), Declaration,
-                   RetainedNodes.get(), ThrownTypes.get(), Storage,
-                   ShouldCreate);
+                   RetainedNodes.get(), ThrownTypes.get(), Annotations.get(),
+                   Storage, ShouldCreate);
   }
-  static DISubprogram *getImpl(LLVMContext &Context, Metadata *Scope,
-                               MDString *Name, MDString *LinkageName,
-                               Metadata *File, unsigned Line, Metadata *Type,
-                               unsigned ScopeLine, Metadata *ContainingType,
-                               unsigned VirtualIndex, int ThisAdjustment,
-                               DIFlags Flags, DISPFlags SPFlags, Metadata *Unit,
-                               Metadata *TemplateParams, Metadata *Declaration,
-                               Metadata *RetainedNodes, Metadata *ThrownTypes,
-                               StorageType Storage, bool ShouldCreate = true);
+  static DISubprogram *
+  getImpl(LLVMContext &Context, Metadata *Scope, MDString *Name,
+          MDString *LinkageName, Metadata *File, unsigned Line, Metadata *Type,
+          unsigned ScopeLine, Metadata *ContainingType, unsigned VirtualIndex,
+          int ThisAdjustment, DIFlags Flags, DISPFlags SPFlags, Metadata *Unit,
+          Metadata *TemplateParams, Metadata *Declaration,
+          Metadata *RetainedNodes, Metadata *ThrownTypes, Metadata *Annotations,
+          StorageType Storage, bool ShouldCreate = true);
 
   TempDISubprogram cloneImpl() const {
     return getTemporary(getContext(), getScope(), getName(), getLinkageName(),
@@ -1898,7 +1922,7 @@ private:
                         getContainingType(), getVirtualIndex(),
                         getThisAdjustment(), getFlags(), getSPFlags(),
                         getUnit(), getTemplateParams(), getDeclaration(),
-                        getRetainedNodes(), getThrownTypes());
+                        getRetainedNodes(), getThrownTypes(), getAnnotations());
   }
 
 public:
@@ -1910,10 +1934,10 @@ public:
        DIFlags Flags, DISPFlags SPFlags, DICompileUnit *Unit,
        DITemplateParameterArray TemplateParams = nullptr,
        DISubprogram *Declaration = nullptr, DINodeArray RetainedNodes = nullptr,
-       DITypeArray ThrownTypes = nullptr),
+       DITypeArray ThrownTypes = nullptr, DINodeArray Annotations = nullptr),
       (Scope, Name, LinkageName, File, Line, Type, ScopeLine, ContainingType,
        VirtualIndex, ThisAdjustment, Flags, SPFlags, Unit, TemplateParams,
-       Declaration, RetainedNodes, ThrownTypes))
+       Declaration, RetainedNodes, ThrownTypes, Annotations))
 
   DEFINE_MDNODE_GET(
       DISubprogram,
@@ -1922,10 +1946,11 @@ public:
        Metadata *ContainingType, unsigned VirtualIndex, int ThisAdjustment,
        DIFlags Flags, DISPFlags SPFlags, Metadata *Unit,
        Metadata *TemplateParams = nullptr, Metadata *Declaration = nullptr,
-       Metadata *RetainedNodes = nullptr, Metadata *ThrownTypes = nullptr),
+       Metadata *RetainedNodes = nullptr, Metadata *ThrownTypes = nullptr,
+       Metadata *Annotations = nullptr),
       (Scope, Name, LinkageName, File, Line, Type, ScopeLine, ContainingType,
        VirtualIndex, ThisAdjustment, Flags, SPFlags, Unit, TemplateParams,
-       Declaration, RetainedNodes, ThrownTypes))
+       Declaration, RetainedNodes, ThrownTypes, Annotations))
 
   TempDISubprogram clone() const { return cloneImpl(); }
 
@@ -1942,7 +1967,10 @@ public:
   unsigned getVirtualIndex() const { return VirtualIndex; }
   int getThisAdjustment() const { return ThisAdjustment; }
   unsigned getScopeLine() const { return ScopeLine; }
-  void setScopeLine(unsigned L) { assert(isDistinct()); ScopeLine = L; }
+  void setScopeLine(unsigned L) {
+    assert(isDistinct());
+    ScopeLine = L;
+  }
   DIFlags getFlags() const { return Flags; }
   DISPFlags getSPFlags() const { return SPFlags; }
   bool isLocalToUnit() const { return getSPFlags() & SPFlagLocalToUnit; }
@@ -2028,6 +2056,9 @@ public:
   DITypeArray getThrownTypes() const {
     return cast_or_null<MDTuple>(getRawThrownTypes());
   }
+  DINodeArray getAnnotations() const {
+    return cast_or_null<MDTuple>(getRawAnnotations());
+  }
 
   Metadata *getRawScope() const { return getOperand(1); }
   MDString *getRawName() const { return getOperandAs<MDString>(2); }
@@ -2045,6 +2076,9 @@ public:
   Metadata *getRawThrownTypes() const {
     return getNumOperands() > 10 ? getOperandAs<Metadata>(10) : nullptr;
   }
+  Metadata *getRawAnnotations() const {
+    return getNumOperands() > 11 ? getOperandAs<Metadata>(11) : nullptr;
+  }
 
   void replaceRawLinkageName(MDString *LinkageName) {
     replaceOperandWith(3, LinkageName);
@@ -2112,11 +2146,13 @@ class DILexicalBlock : public DILexicalBlockBase {
   }
 
 public:
-  DEFINE_MDNODE_GET(DILexicalBlock, (DILocalScope * Scope, DIFile *File,
-                                     unsigned Line, unsigned Column),
+  DEFINE_MDNODE_GET(DILexicalBlock,
+                    (DILocalScope * Scope, DIFile *File, unsigned Line,
+                     unsigned Column),
                     (Scope, File, Line, Column))
-  DEFINE_MDNODE_GET(DILexicalBlock, (Metadata * Scope, Metadata *File,
-                                     unsigned Line, unsigned Column),
+  DEFINE_MDNODE_GET(DILexicalBlock,
+                    (Metadata * Scope, Metadata *File, unsigned Line,
+                     unsigned Column),
                     (Scope, File, Line, Column))
 
   TempDILexicalBlock clone() const { return cloneImpl(); }
@@ -2161,8 +2197,9 @@ class DILexicalBlockFile : public DILexicalBlockBase {
   }
 
 public:
-  DEFINE_MDNODE_GET(DILexicalBlockFile, (DILocalScope * Scope, DIFile *File,
-                                         unsigned Discriminator),
+  DEFINE_MDNODE_GET(DILexicalBlockFile,
+                    (DILocalScope * Scope, DIFile *File,
+                     unsigned Discriminator),
                     (Scope, File, Discriminator))
   DEFINE_MDNODE_GET(DILexicalBlockFile,
                     (Metadata * Scope, Metadata *File, unsigned Discriminator),
@@ -2212,7 +2249,8 @@ unsigned DILocation::getCopyIdentifier() const {
   return getCopyIdentifierFromDiscriminator(getDiscriminator());
 }
 
-Optional<const DILocation *> DILocation::cloneWithBaseDiscriminator(unsigned D) const {
+Optional<const DILocation *>
+DILocation::cloneWithBaseDiscriminator(unsigned D) const {
   unsigned BD, DF, CI;
 
   if (EnableFSDiscriminator) {
@@ -2230,7 +2268,8 @@ Optional<const DILocation *> DILocation::cloneWithBaseDiscriminator(unsigned D)
   return None;
 }
 
-Optional<const DILocation *> DILocation::cloneByMultiplyingDuplicationFactor(unsigned DF) const {
+Optional<const DILocation *>
+DILocation::cloneByMultiplyingDuplicationFactor(unsigned DF) const {
   assert(!EnableFSDiscriminator && "FSDiscriminator should not call this.");
 
   DF *= getDuplicationFactor();
@@ -2274,10 +2313,10 @@ class DINamespace : public DIScope {
 
 public:
   DEFINE_MDNODE_GET(DINamespace,
-                    (DIScope *Scope, StringRef Name, bool ExportSymbols),
+                    (DIScope * Scope, StringRef Name, bool ExportSymbols),
                     (Scope, Name, ExportSymbols))
   DEFINE_MDNODE_GET(DINamespace,
-                    (Metadata *Scope, MDString *Name, bool ExportSymbols),
+                    (Metadata * Scope, MDString *Name, bool ExportSymbols),
                     (Scope, Name, ExportSymbols))
 
   TempDINamespace clone() const { return cloneImpl(); }
@@ -2426,7 +2465,7 @@ public:
                     (StringRef Name, DIType *Type, bool IsDefault),
                     (Name, Type, IsDefault))
   DEFINE_MDNODE_GET(DITemplateTypeParameter,
-                    (MDString *Name, Metadata *Type, bool IsDefault),
+                    (MDString * Name, Metadata *Type, bool IsDefault),
                     (Name, Type, IsDefault))
 
   TempDITemplateTypeParameter clone() const { return cloneImpl(); }
@@ -2819,7 +2858,8 @@ public:
   /// \param OffsetInBits Offset of the piece in bits.
   /// \param SizeInBits   Size of the piece in bits.
   /// \return             Creating a fragment expression may fail if \c Expr
-  ///                     contains arithmetic operations that would be truncated.
+  ///                     contains arithmetic operations that would be
+  ///                     truncated.
   static Optional<DIExpression *>
   createFragmentExpression(const DIExpression *Expr, unsigned OffsetInBits,
                            unsigned SizeInBits);
@@ -2876,6 +2916,12 @@ public:
     return getNumElements() > 0 &&
            getElement(0) == dwarf::DW_OP_LLVM_entry_value;
   }
+
+  /// Try to shorten an expression with an initial constant operand.
+  /// Returns a new expression and constant on success, or the original
+  /// expression and constant on failure.
+  std::pair<DIExpression *, const ConstantInt *>
+  constantFold(const ConstantInt *CI);
 };
 
 inline bool operator==(const DIExpression::FragmentInfo &A,
@@ -2927,46 +2973,47 @@ class DIGlobalVariable : public DIVariable {
           StringRef LinkageName, DIFile *File, unsigned Line, DIType *Type,
           bool IsLocalToUnit, bool IsDefinition,
           DIDerivedType *StaticDataMemberDeclaration, MDTuple *TemplateParams,
-          uint32_t AlignInBits, StorageType Storage, bool ShouldCreate = true) {
+          uint32_t AlignInBits, DINodeArray Annotations, StorageType Storage,
+          bool ShouldCreate = true) {
     return getImpl(Context, Scope, getCanonicalMDString(Context, Name),
                    getCanonicalMDString(Context, LinkageName), File, Line, Type,
                    IsLocalToUnit, IsDefinition, StaticDataMemberDeclaration,
-                   cast_or_null<Metadata>(TemplateParams), AlignInBits, Storage,
-                   ShouldCreate);
+                   cast_or_null<Metadata>(TemplateParams), AlignInBits,
+                   Annotations.get(), Storage, ShouldCreate);
   }
   static DIGlobalVariable *
   getImpl(LLVMContext &Context, Metadata *Scope, MDString *Name,
           MDString *LinkageName, Metadata *File, unsigned Line, Metadata *Type,
           bool IsLocalToUnit, bool IsDefinition,
           Metadata *StaticDataMemberDeclaration, Metadata *TemplateParams,
-          uint32_t AlignInBits, StorageType Storage, bool ShouldCreate = true);
+          uint32_t AlignInBits, Metadata *Annotations, StorageType Storage,
+          bool ShouldCreate = true);
 
   TempDIGlobalVariable cloneImpl() const {
     return getTemporary(getContext(), getScope(), getName(), getLinkageName(),
                         getFile(), getLine(), getType(), isLocalToUnit(),
                         isDefinition(), getStaticDataMemberDeclaration(),
-                        getTemplateParams(), getAlignInBits());
+                        getTemplateParams(), getAlignInBits(),
+                        getAnnotations());
   }
 
 public:
-  DEFINE_MDNODE_GET(DIGlobalVariable,
-                    (DIScope * Scope, StringRef Name, StringRef LinkageName,
-                     DIFile *File, unsigned Line, DIType *Type,
-                     bool IsLocalToUnit, bool IsDefinition,
-                     DIDerivedType *StaticDataMemberDeclaration,
-                     MDTuple *TemplateParams, uint32_t AlignInBits),
-                    (Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit,
-                     IsDefinition, StaticDataMemberDeclaration, TemplateParams,
-                     AlignInBits))
-  DEFINE_MDNODE_GET(DIGlobalVariable,
-                    (Metadata * Scope, MDString *Name, MDString *LinkageName,
-                     Metadata *File, unsigned Line, Metadata *Type,
-                     bool IsLocalToUnit, bool IsDefinition,
-                     Metadata *StaticDataMemberDeclaration,
-                     Metadata *TemplateParams, uint32_t AlignInBits),
-                    (Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit,
-                     IsDefinition, StaticDataMemberDeclaration, TemplateParams,
-                     AlignInBits))
+  DEFINE_MDNODE_GET(
+      DIGlobalVariable,
+      (DIScope * Scope, StringRef Name, StringRef LinkageName, DIFile *File,
+       unsigned Line, DIType *Type, bool IsLocalToUnit, bool IsDefinition,
+       DIDerivedType *StaticDataMemberDeclaration, MDTuple *TemplateParams,
+       uint32_t AlignInBits, DINodeArray Annotations),
+      (Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit, IsDefinition,
+       StaticDataMemberDeclaration, TemplateParams, AlignInBits, Annotations))
+  DEFINE_MDNODE_GET(
+      DIGlobalVariable,
+      (Metadata * Scope, MDString *Name, MDString *LinkageName, Metadata *File,
+       unsigned Line, Metadata *Type, bool IsLocalToUnit, bool IsDefinition,
+       Metadata *StaticDataMemberDeclaration, Metadata *TemplateParams,
+       uint32_t AlignInBits, Metadata *Annotations),
+      (Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit, IsDefinition,
+       StaticDataMemberDeclaration, TemplateParams, AlignInBits, Annotations))
 
   TempDIGlobalVariable clone() const { return cloneImpl(); }
 
@@ -2977,11 +3024,15 @@ public:
   DIDerivedType *getStaticDataMemberDeclaration() const {
     return cast_or_null<DIDerivedType>(getRawStaticDataMemberDeclaration());
   }
+  DINodeArray getAnnotations() const {
+    return cast_or_null<MDTuple>(getRawAnnotations());
+  }
 
   MDString *getRawLinkageName() const { return getOperandAs<MDString>(5); }
   Metadata *getRawStaticDataMemberDeclaration() const { return getOperand(6); }
   Metadata *getRawTemplateParams() const { return getOperand(7); }
   MDTuple *getTemplateParams() const { return getOperandAs<MDTuple>(7); }
+  Metadata *getRawAnnotations() const { return getOperand(8); }
 
   static bool classof(const Metadata *MD) {
     return MD->getMetadataID() == DIGlobalVariableKind;
@@ -2997,20 +3048,20 @@ class DICommonBlock : public DIScope {
   DICommonBlock(LLVMContext &Context, StorageType Storage, unsigned LineNo,
                 ArrayRef<Metadata *> Ops)
       : DIScope(Context, DICommonBlockKind, Storage, dwarf::DW_TAG_common_block,
-                Ops), LineNo(LineNo) {}
+                Ops),
+        LineNo(LineNo) {}
 
   static DICommonBlock *getImpl(LLVMContext &Context, DIScope *Scope,
                                 DIGlobalVariable *Decl, StringRef Name,
                                 DIFile *File, unsigned LineNo,
-                                StorageType Storage,
-                                bool ShouldCreate = true) {
+                                StorageType Storage, bool ShouldCreate = true) {
     return getImpl(Context, Scope, Decl, getCanonicalMDString(Context, Name),
                    File, LineNo, Storage, ShouldCreate);
   }
   static DICommonBlock *getImpl(LLVMContext &Context, Metadata *Scope,
                                 Metadata *Decl, MDString *Name, Metadata *File,
-                                unsigned LineNo, 
-                                StorageType Storage, bool ShouldCreate = true);
+                                unsigned LineNo, StorageType Storage,
+                                bool ShouldCreate = true);
 
   TempDICommonBlock cloneImpl() const {
     return getTemporary(getContext(), getScope(), getDecl(), getName(),
@@ -3019,11 +3070,11 @@ class DICommonBlock : public DIScope {
 
 public:
   DEFINE_MDNODE_GET(DICommonBlock,
-                    (DIScope *Scope, DIGlobalVariable *Decl, StringRef Name,
+                    (DIScope * Scope, DIGlobalVariable *Decl, StringRef Name,
                      DIFile *File, unsigned LineNo),
                     (Scope, Decl, Name, File, LineNo))
   DEFINE_MDNODE_GET(DICommonBlock,
-                    (Metadata *Scope, Metadata *Decl, MDString *Name,
+                    (Metadata * Scope, Metadata *Decl, MDString *Name,
                      Metadata *File, unsigned LineNo),
                     (Scope, Decl, Name, File, LineNo))
 
@@ -3069,34 +3120,39 @@ class DILocalVariable : public DIVariable {
   static DILocalVariable *getImpl(LLVMContext &Context, DIScope *Scope,
                                   StringRef Name, DIFile *File, unsigned Line,
                                   DIType *Type, unsigned Arg, DIFlags Flags,
-                                  uint32_t AlignInBits, StorageType Storage,
+                                  uint32_t AlignInBits, DINodeArray Annotations,
+                                  StorageType Storage,
                                   bool ShouldCreate = true) {
     return getImpl(Context, Scope, getCanonicalMDString(Context, Name), File,
-                   Line, Type, Arg, Flags, AlignInBits, Storage, ShouldCreate);
+                   Line, Type, Arg, Flags, AlignInBits, Annotations.get(),
+                   Storage, ShouldCreate);
   }
   static DILocalVariable *getImpl(LLVMContext &Context, Metadata *Scope,
                                   MDString *Name, Metadata *File, unsigned Line,
                                   Metadata *Type, unsigned Arg, DIFlags Flags,
-                                  uint32_t AlignInBits, StorageType Storage,
+                                  uint32_t AlignInBits, Metadata *Annotations,
+                                  StorageType Storage,
                                   bool ShouldCreate = true);
 
   TempDILocalVariable cloneImpl() const {
     return getTemporary(getContext(), getScope(), getName(), getFile(),
                         getLine(), getType(), getArg(), getFlags(),
-                        getAlignInBits());
+                        getAlignInBits(), getAnnotations());
   }
 
 public:
   DEFINE_MDNODE_GET(DILocalVariable,
                     (DILocalScope * Scope, StringRef Name, DIFile *File,
                      unsigned Line, DIType *Type, unsigned Arg, DIFlags Flags,
-                     uint32_t AlignInBits),
-                    (Scope, Name, File, Line, Type, Arg, Flags, AlignInBits))
+                     uint32_t AlignInBits, DINodeArray Annotations),
+                    (Scope, Name, File, Line, Type, Arg, Flags, AlignInBits,
+                     Annotations))
   DEFINE_MDNODE_GET(DILocalVariable,
                     (Metadata * Scope, MDString *Name, Metadata *File,
-                     unsigned Line, Metadata *Type, unsigned Arg,
-                     DIFlags Flags, uint32_t AlignInBits),
-                    (Scope, Name, File, Line, Type, Arg, Flags, AlignInBits))
+                     unsigned Line, Metadata *Type, unsigned Arg, DIFlags Flags,
+                     uint32_t AlignInBits, Metadata *Annotations),
+                    (Scope, Name, File, Line, Type, Arg, Flags, AlignInBits,
+                     Annotations))
 
   TempDILocalVariable clone() const { return cloneImpl(); }
 
@@ -3111,6 +3167,11 @@ public:
   unsigned getArg() const { return Arg; }
   DIFlags getFlags() const { return Flags; }
 
+  DINodeArray getAnnotations() const {
+    return cast_or_null<MDTuple>(getRawAnnotations());
+  }
+  Metadata *getRawAnnotations() const { return getOperand(4); }
+
   bool isArtificial() const { return getFlags() & FlagArtificial; }
   bool isObjectPointer() const { return getFlags() & FlagObjectPointer; }
 
@@ -3141,16 +3202,14 @@ class DILabel : public DINode {
       : DINode(C, DILabelKind, Storage, dwarf::DW_TAG_label, Ops), Line(Line) {}
   ~DILabel() = default;
 
-  static DILabel *getImpl(LLVMContext &Context, DIScope *Scope,
-                          StringRef Name, DIFile *File, unsigned Line,
-                          StorageType Storage,
+  static DILabel *getImpl(LLVMContext &Context, DIScope *Scope, StringRef Name,
+                          DIFile *File, unsigned Line, StorageType Storage,
                           bool ShouldCreate = true) {
     return getImpl(Context, Scope, getCanonicalMDString(Context, Name), File,
                    Line, Storage, ShouldCreate);
   }
-  static DILabel *getImpl(LLVMContext &Context, Metadata *Scope,
-                          MDString *Name, Metadata *File, unsigned Line,
-                          StorageType Storage,
+  static DILabel *getImpl(LLVMContext &Context, Metadata *Scope, MDString *Name,
+                          Metadata *File, unsigned Line, StorageType Storage,
                           bool ShouldCreate = true);
 
   TempDILabel cloneImpl() const {
@@ -3295,31 +3354,33 @@ class DIImportedEntity : public DINode {
   static DIImportedEntity *getImpl(LLVMContext &Context, unsigned Tag,
                                    DIScope *Scope, DINode *Entity, DIFile *File,
                                    unsigned Line, StringRef Name,
-                                   StorageType Storage,
+                                   DINodeArray Elements, StorageType Storage,
                                    bool ShouldCreate = true) {
     return getImpl(Context, Tag, Scope, Entity, File, Line,
-                   getCanonicalMDString(Context, Name), Storage, ShouldCreate);
+                   getCanonicalMDString(Context, Name), Elements.get(), Storage,
+                   ShouldCreate);
   }
-  static DIImportedEntity *getImpl(LLVMContext &Context, unsigned Tag,
-                                   Metadata *Scope, Metadata *Entity,
-                                   Metadata *File, unsigned Line,
-                                   MDString *Name, StorageType Storage,
-                                   bool ShouldCreate = true);
+  static DIImportedEntity *
+  getImpl(LLVMContext &Context, unsigned Tag, Metadata *Scope, Metadata *Entity,
+          Metadata *File, unsigned Line, MDString *Name, Metadata *Elements,
+          StorageType Storage, bool ShouldCreate = true);
 
   TempDIImportedEntity cloneImpl() const {
     return getTemporary(getContext(), getTag(), getScope(), getEntity(),
-                        getFile(), getLine(), getName());
+                        getFile(), getLine(), getName(), getElements());
   }
 
 public:
   DEFINE_MDNODE_GET(DIImportedEntity,
                     (unsigned Tag, DIScope *Scope, DINode *Entity, DIFile *File,
-                     unsigned Line, StringRef Name = ""),
-                    (Tag, Scope, Entity, File, Line, Name))
+                     unsigned Line, StringRef Name = "",
+                     DINodeArray Elements = nullptr),
+                    (Tag, Scope, Entity, File, Line, Name, Elements))
   DEFINE_MDNODE_GET(DIImportedEntity,
                     (unsigned Tag, Metadata *Scope, Metadata *Entity,
-                     Metadata *File, unsigned Line, MDString *Name),
-                    (Tag, Scope, Entity, File, Line, Name))
+                     Metadata *File, unsigned Line, MDString *Name,
+                     Metadata *Elements = nullptr),
+                    (Tag, Scope, Entity, File, Line, Name, Elements))
 
   TempDIImportedEntity clone() const { return cloneImpl(); }
 
@@ -3328,11 +3389,15 @@ public:
   DINode *getEntity() const { return cast_or_null<DINode>(getRawEntity()); }
   StringRef getName() const { return getStringOperand(2); }
   DIFile *getFile() const { return cast_or_null<DIFile>(getRawFile()); }
+  DINodeArray getElements() const {
+    return cast_or_null<MDTuple>(getRawElements());
+  }
 
   Metadata *getRawScope() const { return getOperand(0); }
   Metadata *getRawEntity() const { return getOperand(1); }
   MDString *getRawName() const { return getOperandAs<MDString>(2); }
   Metadata *getRawFile() const { return getOperand(3); }
+  Metadata *getRawElements() const { return getOperand(4); }
 
   static bool classof(const Metadata *MD) {
     return MD->getMetadataID() == DIImportedEntityKind;
@@ -3457,11 +3522,13 @@ class DIMacro : public DIMacroNode {
   }
 
 public:
-  DEFINE_MDNODE_GET(DIMacro, (unsigned MIType, unsigned Line, StringRef Name,
-                              StringRef Value = ""),
+  DEFINE_MDNODE_GET(DIMacro,
+                    (unsigned MIType, unsigned Line, StringRef Name,
+                     StringRef Value = ""),
                     (MIType, Line, Name, Value))
-  DEFINE_MDNODE_GET(DIMacro, (unsigned MIType, unsigned Line, MDString *Name,
-                              MDString *Value),
+  DEFINE_MDNODE_GET(DIMacro,
+                    (unsigned MIType, unsigned Line, MDString *Name,
+                     MDString *Value),
                     (MIType, Line, Name, Value))
 
   TempDIMacro clone() const { return cloneImpl(); }
@@ -3508,11 +3575,13 @@ class DIMacroFile : public DIMacroNode {
   }
 
 public:
-  DEFINE_MDNODE_GET(DIMacroFile, (unsigned MIType, unsigned Line, DIFile *File,
-                                  DIMacroNodeArray Elements),
+  DEFINE_MDNODE_GET(DIMacroFile,
+                    (unsigned MIType, unsigned Line, DIFile *File,
+                     DIMacroNodeArray Elements),
                     (MIType, Line, File, Elements))
-  DEFINE_MDNODE_GET(DIMacroFile, (unsigned MIType, unsigned Line,
-                                  Metadata *File, Metadata *Elements),
+  DEFINE_MDNODE_GET(DIMacroFile,
+                    (unsigned MIType, unsigned Line, Metadata *File,
+                     Metadata *Elements),
                     (MIType, Line, File, Elements))
 
   TempDIMacroFile clone() const { return cloneImpl(); }
diff --git a/llvm/include/llvm/IR/DerivedTypes.h b/llvm/include/llvm/IR/DerivedTypes.h
index b68a912b5f70..8a1b26e699e3 100644
--- a/llvm/include/llvm/IR/DerivedTypes.h
+++ b/llvm/include/llvm/IR/DerivedTypes.h
@@ -49,10 +49,11 @@ public:
   /// This enum is just used to hold constants we need for IntegerType.
   enum {
     MIN_INT_BITS = 1,        ///< Minimum number of bits that can be specified
-    MAX_INT_BITS = (1<<24)-1 ///< Maximum number of bits that can be specified
+    MAX_INT_BITS = (1<<23)   ///< Maximum number of bits that can be specified
       ///< Note that bit width is stored in the Type classes SubclassData field
-      ///< which has 24 bits. This yields a maximum bit width of 16,777,215
-      ///< bits.
+      ///< which has 24 bits. SelectionDAG type legalization can require a
+      ///< power of 2 IntegerType, so limit to the largest representable power
+      ///< of 2, 8388608.
   };
 
   /// This static method is the primary way of constructing an IntegerType.
diff --git a/llvm/include/llvm/IR/DiagnosticInfo.h b/llvm/include/llvm/IR/DiagnosticInfo.h
index 5064f4f4edf7..73b0be43e136 100644
--- a/llvm/include/llvm/IR/DiagnosticInfo.h
+++ b/llvm/include/llvm/IR/DiagnosticInfo.h
@@ -33,6 +33,7 @@ namespace llvm {
 
 // Forward declarations.
 class DiagnosticPrinter;
+class CallInst;
 class Function;
 class Instruction;
 class InstructionCost;
@@ -79,6 +80,7 @@ enum DiagnosticKind {
   DK_PGOProfile,
   DK_Unsupported,
   DK_SrcMgr,
+  DK_DontCall,
   DK_FirstPluginKind // Must be last value to work with
                      // getNextAvailablePluginDiagnosticKind
 };
@@ -194,10 +196,9 @@ public:
   /// \p The function that is concerned by this stack size diagnostic.
   /// \p The computed stack size.
   DiagnosticInfoResourceLimit(const Function &Fn, const char *ResourceName,
-                              uint64_t ResourceSize,
+                              uint64_t ResourceSize, uint64_t ResourceLimit,
                               DiagnosticSeverity Severity = DS_Warning,
-                              DiagnosticKind Kind = DK_ResourceLimit,
-                              uint64_t ResourceLimit = 0)
+                              DiagnosticKind Kind = DK_ResourceLimit)
       : DiagnosticInfo(Kind, Severity), Fn(Fn), ResourceName(ResourceName),
         ResourceSize(ResourceSize), ResourceLimit(ResourceLimit) {}
 
@@ -218,10 +219,10 @@ class DiagnosticInfoStackSize : public DiagnosticInfoResourceLimit {
   void anchor() override;
 public:
   DiagnosticInfoStackSize(const Function &Fn, uint64_t StackSize,
-                          DiagnosticSeverity Severity = DS_Warning,
-                          uint64_t StackLimit = 0)
-      : DiagnosticInfoResourceLimit(Fn, "stack frame size", StackSize, Severity,
-                                    DK_StackSize, StackLimit) {}
+                          uint64_t StackLimit,
+                          DiagnosticSeverity Severity = DS_Warning)
+      : DiagnosticInfoResourceLimit(Fn, "stack frame size", StackSize,
+                                    StackLimit, Severity, DK_StackSize) {}
 
   uint64_t getStackSize() const { return getResourceSize(); }
   uint64_t getStackLimit() const { return getResourceLimit(); }
@@ -1070,6 +1071,27 @@ public:
   }
 };
 
+void diagnoseDontCall(const CallInst &CI);
+
+class DiagnosticInfoDontCall : public DiagnosticInfo {
+  StringRef CalleeName;
+  StringRef Note;
+  unsigned LocCookie;
+
+public:
+  DiagnosticInfoDontCall(StringRef CalleeName, StringRef Note,
+                         DiagnosticSeverity DS, unsigned LocCookie)
+      : DiagnosticInfo(DK_DontCall, DS), CalleeName(CalleeName), Note(Note),
+        LocCookie(LocCookie) {}
+  StringRef getFunctionName() const { return CalleeName; }
+  StringRef getNote() const { return Note; }
+  unsigned getLocCookie() const { return LocCookie; }
+  void print(DiagnosticPrinter &DP) const override;
+  static bool classof(const DiagnosticInfo *DI) {
+    return DI->getKind() == DK_DontCall;
+  }
+};
+
 } // end namespace llvm
 
 #endif // LLVM_IR_DIAGNOSTICINFO_H
diff --git a/llvm/include/llvm/IR/DiagnosticPrinter.h b/llvm/include/llvm/IR/DiagnosticPrinter.h
index 102932ceefa5..2df6fc3dfe73 100644
--- a/llvm/include/llvm/IR/DiagnosticPrinter.h
+++ b/llvm/include/llvm/IR/DiagnosticPrinter.h
@@ -1,4 +1,4 @@
-//===- llvm/Support/DiagnosticPrinter.h - Diagnostic Printer ----*- C++ -*-===//
+//===- llvm/IR/DiagnosticPrinter.h - Diagnostic Printer ---------*- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/include/llvm/IR/Dominators.h b/llvm/include/llvm/IR/Dominators.h
index 4d140c3ad0f2..475355af5647 100644
--- a/llvm/include/llvm/IR/Dominators.h
+++ b/llvm/include/llvm/IR/Dominators.h
@@ -277,6 +277,12 @@ struct DominatorTreeVerifierPass : PassInfoMixin<DominatorTreeVerifierPass> {
   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
 };
 
+/// Enables verification of dominator trees.
+///
+/// This check is expensive and is disabled by default.  `-verify-dom-info`
+/// allows selectively enabling the check without needing to recompile.
+extern bool VerifyDomInfo;
+
 /// Legacy analysis pass which computes a \c DominatorTree.
 class DominatorTreeWrapperPass : public FunctionPass {
   DominatorTree DT;
diff --git a/llvm/include/llvm/IR/FPEnv.h b/llvm/include/llvm/IR/FPEnv.h
index 621540000b5c..bf435ec6d109 100644
--- a/llvm/include/llvm/IR/FPEnv.h
+++ b/llvm/include/llvm/IR/FPEnv.h
@@ -39,24 +39,30 @@ enum ExceptionBehavior : uint8_t {
 /// Returns a valid RoundingMode enumerator when given a string
 /// that is valid as input in constrained intrinsic rounding mode
 /// metadata.
-Optional<RoundingMode> StrToRoundingMode(StringRef);
+Optional<RoundingMode> convertStrToRoundingMode(StringRef);
 
 /// For any RoundingMode enumerator, returns a string valid as input in
 /// constrained intrinsic rounding mode metadata.
-Optional<StringRef> RoundingModeToStr(RoundingMode);
+Optional<StringRef> convertRoundingModeToStr(RoundingMode);
 
 /// Returns a valid ExceptionBehavior enumerator when given a string
 /// valid as input in constrained intrinsic exception behavior metadata.
-Optional<fp::ExceptionBehavior> StrToExceptionBehavior(StringRef);
+Optional<fp::ExceptionBehavior> convertStrToExceptionBehavior(StringRef);
 
 /// For any ExceptionBehavior enumerator, returns a string valid as
 /// input in constrained intrinsic exception behavior metadata.
-Optional<StringRef> ExceptionBehaviorToStr(fp::ExceptionBehavior);
+Optional<StringRef> convertExceptionBehaviorToStr(fp::ExceptionBehavior);
 
 /// Returns true if the exception handling behavior and rounding mode
 /// match what is used in the default floating point environment.
 inline bool isDefaultFPEnvironment(fp::ExceptionBehavior EB, RoundingMode RM) {
   return EB == fp::ebIgnore && RM == RoundingMode::NearestTiesToEven;
 }
+
+/// Returns true if the rounding mode RM may be QRM at compile time or
+/// at run time.
+inline bool canRoundingModeBe(RoundingMode RM, RoundingMode QRM) {
+  return RM == QRM || RM == RoundingMode::Dynamic;
+}
 }
 #endif
diff --git a/llvm/include/llvm/IR/Function.h b/llvm/include/llvm/IR/Function.h
index e0094e2afff2..669418eacbb0 100644
--- a/llvm/include/llvm/IR/Function.h
+++ b/llvm/include/llvm/IR/Function.h
@@ -48,6 +48,7 @@ typedef unsigned ID;
 
 class AssemblyAnnotationWriter;
 class Constant;
+struct DenormalMode;
 class DISubprogram;
 class LLVMContext;
 class Module;
@@ -58,7 +59,8 @@ class User;
 class BranchProbabilityInfo;
 class BlockFrequencyInfo;
 
-class Function : public GlobalObject, public ilist_node<Function> {
+class LLVM_EXTERNAL_VISIBILITY Function : public GlobalObject,
+                                          public ilist_node<Function> {
 public:
   using BasicBlockListType = SymbolTableList<BasicBlock>;
 
@@ -245,72 +247,22 @@ public:
     setValueSubclassData((getSubclassDataFromValue() & 0xc00f) | (ID << 4));
   }
 
-  /// Return the attribute list for this Function.
-  AttributeList getAttributes() const { return AttributeSets; }
-
-  /// Set the attribute list for this Function.
-  void setAttributes(AttributeList Attrs) { AttributeSets = Attrs; }
-
-  /// Add function attributes to this function.
-  void addFnAttr(Attribute::AttrKind Kind) {
-    addAttribute(AttributeList::FunctionIndex, Kind);
-  }
-
-  /// Add function attributes to this function.
-  void addFnAttr(StringRef Kind, StringRef Val = StringRef()) {
-    addAttribute(AttributeList::FunctionIndex,
-                 Attribute::get(getContext(), Kind, Val));
-  }
-
-  /// Add function attributes to this function.
-  void addFnAttr(Attribute Attr) {
-    addAttribute(AttributeList::FunctionIndex, Attr);
-  }
-
-  /// Remove function attributes from this function.
-  void removeFnAttr(Attribute::AttrKind Kind) {
-    removeAttribute(AttributeList::FunctionIndex, Kind);
-  }
-
-  /// Remove function attribute from this function.
-  void removeFnAttr(StringRef Kind) {
-    setAttributes(getAttributes().removeAttribute(
-        getContext(), AttributeList::FunctionIndex, Kind));
-  }
-
-  /// A function will have the "coroutine.presplit" attribute if it's
-  /// a coroutine and has not gone through full CoroSplit pass.
-  bool isPresplitCoroutine() const {
-    return hasFnAttribute("coroutine.presplit");
-  }
-
-  enum ProfileCountType { PCT_Invalid, PCT_Real, PCT_Synthetic };
+  enum ProfileCountType { PCT_Real, PCT_Synthetic };
 
   /// Class to represent profile counts.
   ///
   /// This class represents both real and synthetic profile counts.
   class ProfileCount {
   private:
-    uint64_t Count;
-    ProfileCountType PCT;
-    static ProfileCount Invalid;
+    uint64_t Count = 0;
+    ProfileCountType PCT = PCT_Real;
 
   public:
-    ProfileCount() : Count(-1), PCT(PCT_Invalid) {}
     ProfileCount(uint64_t Count, ProfileCountType PCT)
         : Count(Count), PCT(PCT) {}
-    bool hasValue() const { return PCT != PCT_Invalid; }
     uint64_t getCount() const { return Count; }
     ProfileCountType getType() const { return PCT; }
     bool isSynthetic() const { return PCT == PCT_Synthetic; }
-    explicit operator bool() { return hasValue(); }
-    bool operator!() const { return !hasValue(); }
-    // Update the count retaining the same profile count type.
-    ProfileCount &setCount(uint64_t C) {
-      Count = C;
-      return *this;
-    }
-    static ProfileCount getInvalid() { return ProfileCount(-1, PCT_Invalid); }
   };
 
   /// Set the entry count for this function.
@@ -330,7 +282,7 @@ public:
   ///
   /// Entry count is the number of times the function was executed.
   /// When AllowSynthetic is false, only pgo_data will be returned.
-  ProfileCount getEntryCount(bool AllowSynthetic = false) const;
+  Optional<ProfileCount> getEntryCount(bool AllowSynthetic = false) const;
 
   /// Return true if the function is annotated with profile data.
   ///
@@ -351,43 +303,6 @@ public:
   /// Get the section prefix for this function.
   Optional<StringRef> getSectionPrefix() const;
 
-  /// Return true if the function has the attribute.
-  bool hasFnAttribute(Attribute::AttrKind Kind) const {
-    return AttributeSets.hasFnAttribute(Kind);
-  }
-
-  /// Return true if the function has the attribute.
-  bool hasFnAttribute(StringRef Kind) const {
-    return AttributeSets.hasFnAttribute(Kind);
-  }
-
-  /// Return the attribute for the given attribute kind.
-  Attribute getFnAttribute(Attribute::AttrKind Kind) const {
-    return getAttribute(AttributeList::FunctionIndex, Kind);
-  }
-
-  /// Return the attribute for the given attribute kind.
-  Attribute getFnAttribute(StringRef Kind) const {
-    return getAttribute(AttributeList::FunctionIndex, Kind);
-  }
-
-  /// Return the stack alignment for the function.
-  unsigned getFnStackAlignment() const {
-    if (!hasFnAttribute(Attribute::StackAlignment))
-      return 0;
-    if (const auto MA =
-            AttributeSets.getStackAlignment(AttributeList::FunctionIndex))
-      return MA->value();
-    return 0;
-  }
-
-  /// Return the stack alignment for the function.
-  MaybeAlign getFnStackAlign() const {
-    if (!hasFnAttribute(Attribute::StackAlignment))
-      return None;
-    return AttributeSets.getStackAlignment(AttributeList::FunctionIndex);
-  }
-
   /// hasGC/getGC/setGC/clearGC - The name of the garbage collection algorithm
   ///                             to use during code generation.
   bool hasGC() const {
@@ -397,17 +312,36 @@ public:
   void setGC(std::string Str);
   void clearGC();
 
-  /// Returns true if the function has ssp, sspstrong, or sspreq fn attrs.
-  bool hasStackProtectorFnAttr() const;
+  /// Return the attribute list for this Function.
+  AttributeList getAttributes() const { return AttributeSets; }
 
-  /// adds the attribute to the list of attributes.
-  void addAttribute(unsigned i, Attribute::AttrKind Kind);
+  /// Set the attribute list for this Function.
+  void setAttributes(AttributeList Attrs) { AttributeSets = Attrs; }
 
+  // TODO: remove non-AtIndex versions of these methods.
   /// adds the attribute to the list of attributes.
-  void addAttribute(unsigned i, Attribute Attr);
+  void addAttributeAtIndex(unsigned i, Attribute Attr);
+
+  /// Add function attributes to this function.
+  void addFnAttr(Attribute::AttrKind Kind);
+
+  /// Add function attributes to this function.
+  void addFnAttr(StringRef Kind, StringRef Val = StringRef());
+
+  /// Add function attributes to this function.
+  void addFnAttr(Attribute Attr);
+
+  /// Add function attributes to this function.
+  void addFnAttrs(const AttrBuilder &Attrs);
 
-  /// adds the attributes to the list of attributes.
-  void addAttributes(unsigned i, const AttrBuilder &Attrs);
+  /// Add return value attributes to this function.
+  void addRetAttr(Attribute::AttrKind Kind);
+
+  /// Add return value attributes to this function.
+  void addRetAttr(Attribute Attr);
+
+  /// Add return value attributes to this function.
+  void addRetAttrs(const AttrBuilder &Attrs);
 
   /// adds the attribute to the list of attributes for the given arg.
   void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind);
@@ -419,13 +353,27 @@ public:
   void addParamAttrs(unsigned ArgNo, const AttrBuilder &Attrs);
 
   /// removes the attribute from the list of attributes.
-  void removeAttribute(unsigned i, Attribute::AttrKind Kind);
+  void removeAttributeAtIndex(unsigned i, Attribute::AttrKind Kind);
 
   /// removes the attribute from the list of attributes.
-  void removeAttribute(unsigned i, StringRef Kind);
+  void removeAttributeAtIndex(unsigned i, StringRef Kind);
+
+  /// Remove function attributes from this function.
+  void removeFnAttr(Attribute::AttrKind Kind);
+
+  /// Remove function attribute from this function.
+  void removeFnAttr(StringRef Kind);
+
+  void removeFnAttrs(const AttrBuilder &Attrs);
 
-  /// removes the attributes from the list of attributes.
-  void removeAttributes(unsigned i, const AttrBuilder &Attrs);
+  /// removes the attribute from the return value list of attributes.
+  void removeRetAttr(Attribute::AttrKind Kind);
+
+  /// removes the attribute from the return value list of attributes.
+  void removeRetAttr(StringRef Kind);
+
+  /// removes the attributes from the return value list of attributes.
+  void removeRetAttrs(const AttrBuilder &Attrs);
 
   /// removes the attribute from the list of attributes.
   void removeParamAttr(unsigned ArgNo, Attribute::AttrKind Kind);
@@ -436,46 +384,49 @@ public:
   /// removes the attribute from the list of attributes.
   void removeParamAttrs(unsigned ArgNo, const AttrBuilder &Attrs);
 
-  /// removes noundef and other attributes that imply undefined behavior if a
-  /// `undef` or `poison` value is passed from the list of attributes.
-  void removeParamUndefImplyingAttrs(unsigned ArgNo);
+  /// Return true if the function has the attribute.
+  bool hasFnAttribute(Attribute::AttrKind Kind) const;
 
-  /// check if an attributes is in the list of attributes.
-  bool hasAttribute(unsigned i, Attribute::AttrKind Kind) const {
-    return getAttributes().hasAttribute(i, Kind);
-  }
+  /// Return true if the function has the attribute.
+  bool hasFnAttribute(StringRef Kind) const;
 
-  /// check if an attributes is in the list of attributes.
-  bool hasParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const {
-    return getAttributes().hasParamAttribute(ArgNo, Kind);
-  }
+  /// check if an attribute is in the list of attributes for the return value.
+  bool hasRetAttribute(Attribute::AttrKind Kind) const;
 
-  /// gets the specified attribute from the list of attributes.
-  Attribute getParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const {
-    return getAttributes().getParamAttr(ArgNo, Kind);
-  }
+  /// check if an attributes is in the list of attributes.
+  bool hasParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const;
 
   /// gets the attribute from the list of attributes.
-  Attribute getAttribute(unsigned i, Attribute::AttrKind Kind) const {
-    return AttributeSets.getAttribute(i, Kind);
-  }
+  Attribute getAttributeAtIndex(unsigned i, Attribute::AttrKind Kind) const;
 
   /// gets the attribute from the list of attributes.
-  Attribute getAttribute(unsigned i, StringRef Kind) const {
-    return AttributeSets.getAttribute(i, Kind);
+  Attribute getAttributeAtIndex(unsigned i, StringRef Kind) const;
+
+  /// Return the attribute for the given attribute kind.
+  Attribute getFnAttribute(Attribute::AttrKind Kind) const;
+
+  /// Return the attribute for the given attribute kind.
+  Attribute getFnAttribute(StringRef Kind) const;
+
+  /// gets the specified attribute from the list of attributes.
+  Attribute getParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const;
+
+  /// removes noundef and other attributes that imply undefined behavior if a
+  /// `undef` or `poison` value is passed from the list of attributes.
+  void removeParamUndefImplyingAttrs(unsigned ArgNo);
+
+  /// Return the stack alignment for the function.
+  MaybeAlign getFnStackAlign() const {
+    return AttributeSets.getFnStackAlignment();
   }
 
-  /// adds the dereferenceable attribute to the list of attributes.
-  void addDereferenceableAttr(unsigned i, uint64_t Bytes);
+  /// Returns true if the function has ssp, sspstrong, or sspreq fn attrs.
+  bool hasStackProtectorFnAttr() const;
 
   /// adds the dereferenceable attribute to the list of attributes for
   /// the given arg.
   void addDereferenceableParamAttr(unsigned ArgNo, uint64_t Bytes);
 
-  /// adds the dereferenceable_or_null attribute to the list of
-  /// attributes.
-  void addDereferenceableOrNullAttr(unsigned i, uint64_t Bytes);
-
   /// adds the dereferenceable_or_null attribute to the list of
   /// attributes for the given arg.
   void addDereferenceableOrNullParamAttr(unsigned ArgNo, uint64_t Bytes);
@@ -483,7 +434,7 @@ public:
   /// Extract the alignment for a call or parameter (0=unknown).
   /// FIXME: Remove this function once transition to Align is over.
   /// Use getParamAlign() instead.
-  unsigned getParamAlignment(unsigned ArgNo) const {
+  uint64_t getParamAlignment(unsigned ArgNo) const {
     if (const auto MA = getParamAlign(ArgNo))
       return MA->value();
     return 0;
@@ -517,11 +468,9 @@ public:
     return AttributeSets.getParamByRefType(ArgNo);
   }
 
-  /// Extract the number of dereferenceable bytes for a call or
-  /// parameter (0=unknown).
-  /// @param i AttributeList index, referring to a return value or argument.
-  uint64_t getDereferenceableBytes(unsigned i) const {
-    return AttributeSets.getDereferenceableBytes(i);
+  /// Extract the preallocated type for a parameter.
+  Type *getParamPreallocatedType(unsigned ArgNo) const {
+    return AttributeSets.getParamPreallocatedType(ArgNo);
   }
 
   /// Extract the number of dereferenceable bytes for a parameter.
@@ -530,13 +479,6 @@ public:
     return AttributeSets.getParamDereferenceableBytes(ArgNo);
   }
 
-  /// Extract the number of dereferenceable_or_null bytes for a call or
-  /// parameter (0=unknown).
-  /// @param i AttributeList index, referring to a return value or argument.
-  uint64_t getDereferenceableOrNullBytes(unsigned i) const {
-    return AttributeSets.getDereferenceableOrNullBytes(i);
-  }
-
   /// Extract the number of dereferenceable_or_null bytes for a
   /// parameter.
   /// @param ArgNo AttributeList ArgNo, referring to an argument.
@@ -544,6 +486,12 @@ public:
     return AttributeSets.getParamDereferenceableOrNullBytes(ArgNo);
   }
 
+  /// A function will have the "coroutine.presplit" attribute if it's
+  /// a coroutine and has not gone through full CoroSplit pass.
+  bool isPresplitCoroutine() const {
+    return hasFnAttribute("coroutine.presplit");
+  }
+
   /// Determine if the function does not access memory.
   bool doesNotAccessMemory() const {
     return hasFnAttribute(Attribute::ReadNone);
@@ -692,19 +640,16 @@ public:
   /// Determine if the function returns a structure through first
   /// or second pointer argument.
   bool hasStructRetAttr() const {
-    return AttributeSets.hasParamAttribute(0, Attribute::StructRet) ||
-           AttributeSets.hasParamAttribute(1, Attribute::StructRet);
+    return AttributeSets.hasParamAttr(0, Attribute::StructRet) ||
+           AttributeSets.hasParamAttr(1, Attribute::StructRet);
   }
 
   /// Determine if the parameter or return value is marked with NoAlias
   /// attribute.
   bool returnDoesNotAlias() const {
-    return AttributeSets.hasAttribute(AttributeList::ReturnIndex,
-                                      Attribute::NoAlias);
-  }
-  void setReturnDoesNotAlias() {
-    addAttribute(AttributeList::ReturnIndex, Attribute::NoAlias);
+    return AttributeSets.hasRetAttr(Attribute::NoAlias);
   }
+  void setReturnDoesNotAlias() { addRetAttr(Attribute::NoAlias); }
 
   /// Do not optimize this function (-O0).
   bool hasOptNone() const { return hasFnAttribute(Attribute::OptimizeNone); }
@@ -904,13 +849,14 @@ public:
   /// hasAddressTaken - returns true if there are any uses of this function
   /// other than direct calls or invokes to it, or blockaddress expressions.
   /// Optionally passes back an offending user for diagnostic purposes,
-  /// ignores callback uses, assume like pointer annotation calls, and
-  /// references in llvm.used and llvm.compiler.used variables.
-  ///
+  /// ignores callback uses, assume like pointer annotation calls, references in
+  /// llvm.used and llvm.compiler.used variables, and operand bundle
+  /// "clang.arc.attachedcall".
   bool hasAddressTaken(const User ** = nullptr,
                        bool IgnoreCallbackUses = false,
                        bool IgnoreAssumeLikeCalls = true,
-                       bool IngoreLLVMUsed = false) const;
+                       bool IngoreLLVMUsed = false,
+                       bool IgnoreARCAttachedCall = false) const;
 
   /// isDefTriviallyDead - Return true if it is trivially safe to remove
   /// this function definition from the module (because it isn't externally
diff --git a/llvm/include/llvm/IR/GCStrategy.h b/llvm/include/llvm/IR/GCStrategy.h
index a69958d596c6..4fa8e3a8dcf4 100644
--- a/llvm/include/llvm/IR/GCStrategy.h
+++ b/llvm/include/llvm/IR/GCStrategy.h
@@ -131,6 +131,9 @@ public:
 /// GCMetadataPrinterRegistery as well.
 using GCRegistry = Registry<GCStrategy>;
 
+/// Lookup the GCStrategy object associated with the given gc name.
+std::unique_ptr<GCStrategy> getGCStrategy(const StringRef Name);
+
 } // end namespace llvm
 
 #endif // LLVM_IR_GCSTRATEGY_H
diff --git a/llvm/include/llvm/IR/GlobalAlias.h b/llvm/include/llvm/IR/GlobalAlias.h
index f2d9b9676ec9..01134448a8fa 100644
--- a/llvm/include/llvm/IR/GlobalAlias.h
+++ b/llvm/include/llvm/IR/GlobalAlias.h
@@ -15,7 +15,8 @@
 #define LLVM_IR_GLOBALALIAS_H
 
 #include "llvm/ADT/ilist_node.h"
-#include "llvm/IR/GlobalIndirectSymbol.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/OperandTraits.h"
 #include "llvm/IR/Value.h"
 
 namespace llvm {
@@ -24,8 +25,7 @@ class Twine;
 class Module;
 template <typename ValueSubClass> class SymbolTableListTraits;
 
-class GlobalAlias : public GlobalIndirectSymbol,
-                    public ilist_node<GlobalAlias> {
+class GlobalAlias : public GlobalValue, public ilist_node<GlobalAlias> {
   friend class SymbolTableListTraits<GlobalAlias>;
 
   GlobalAlias(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage,
@@ -58,6 +58,17 @@ public:
   // Linkage, Type, Parent and AddressSpace taken from the Aliasee.
   static GlobalAlias *create(const Twine &Name, GlobalValue *Aliasee);
 
+  // allocate space for exactly one operand
+  void *operator new(size_t S) { return User::operator new(S, 1); }
+  void operator delete(void *Ptr) { User::operator delete(Ptr); }
+
+  /// Provide fast operand accessors
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant);
+
+  void copyAttributesFrom(const GlobalAlias *Src) {
+    GlobalValue::copyAttributesFrom(Src);
+  }
+
   /// removeFromParent - This method unlinks 'this' from the containing module,
   /// but does not delete it.
   ///
@@ -71,10 +82,14 @@ public:
   /// These methods retrieve and set alias target.
   void setAliasee(Constant *Aliasee);
   const Constant *getAliasee() const {
-    return getIndirectSymbol();
+    return static_cast<Constant *>(Op<0>().get());
   }
-  Constant *getAliasee() {
-    return getIndirectSymbol();
+  Constant *getAliasee() { return static_cast<Constant *>(Op<0>().get()); }
+
+  const GlobalObject *getAliaseeObject() const;
+  GlobalObject *getAliaseeObject() {
+    return const_cast<GlobalObject *>(
+        static_cast<const GlobalAlias *>(this)->getAliaseeObject());
   }
 
   static bool isValidLinkage(LinkageTypes L) {
@@ -88,6 +103,12 @@ public:
   }
 };
 
+template <>
+struct OperandTraits<GlobalAlias>
+    : public FixedNumOperandTraits<GlobalAlias, 1> {};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GlobalAlias, Constant)
+
 } // end namespace llvm
 
 #endif // LLVM_IR_GLOBALALIAS_H
diff --git a/llvm/include/llvm/IR/GlobalIFunc.h b/llvm/include/llvm/IR/GlobalIFunc.h
index ddd29c8a4a19..10088ee2fff4 100644
--- a/llvm/include/llvm/IR/GlobalIFunc.h
+++ b/llvm/include/llvm/IR/GlobalIFunc.h
@@ -18,7 +18,9 @@
 #define LLVM_IR_GLOBALIFUNC_H
 
 #include "llvm/ADT/ilist_node.h"
-#include "llvm/IR/GlobalIndirectSymbol.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/GlobalObject.h"
+#include "llvm/IR/OperandTraits.h"
 #include "llvm/IR/Value.h"
 
 namespace llvm {
@@ -29,8 +31,7 @@ class Module;
 // Traits class for using GlobalIFunc in symbol table in Module.
 template <typename ValueSubClass> class SymbolTableListTraits;
 
-class GlobalIFunc final : public GlobalIndirectSymbol,
-                          public ilist_node<GlobalIFunc> {
+class GlobalIFunc final : public GlobalObject, public ilist_node<GlobalIFunc> {
   friend class SymbolTableListTraits<GlobalIFunc>;
 
   GlobalIFunc(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage,
@@ -46,6 +47,17 @@ public:
                              LinkageTypes Linkage, const Twine &Name,
                              Constant *Resolver, Module *Parent);
 
+  // allocate space for exactly one operand
+  void *operator new(size_t S) { return User::operator new(S, 1); }
+  void operator delete(void *Ptr) { User::operator delete(Ptr); }
+
+  /// Provide fast operand accessors
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant);
+
+  void copyAttributesFrom(const GlobalIFunc *Src) {
+    GlobalObject::copyAttributesFrom(Src);
+  }
+
   /// This method unlinks 'this' from the containing module, but does not
   /// delete it.
   void removeFromParent();
@@ -54,14 +66,22 @@ public:
   void eraseFromParent();
 
   /// These methods retrieve and set ifunc resolver function.
-  void setResolver(Constant *Resolver) {
-    setIndirectSymbol(Resolver);
-  }
+  void setResolver(Constant *Resolver) { Op<0>().set(Resolver); }
   const Constant *getResolver() const {
-    return getIndirectSymbol();
+    return static_cast<Constant *>(Op<0>().get());
   }
-  Constant *getResolver() {
-    return getIndirectSymbol();
+  Constant *getResolver() { return static_cast<Constant *>(Op<0>().get()); }
+
+  // Return the resolver function after peeling off potential ConstantExpr
+  // indirection.
+  const Function *getResolverFunction() const;
+  Function *getResolverFunction() {
+    return const_cast<Function *>(
+        static_cast<const GlobalIFunc *>(this)->getResolverFunction());
+  }
+
+  static FunctionType *getResolverFunctionType(Type *IFuncValTy) {
+    return FunctionType::get(IFuncValTy->getPointerTo(), false);
   }
 
   // Methods for support type inquiry through isa, cast, and dyn_cast:
@@ -70,6 +90,12 @@ public:
   }
 };
 
+template <>
+struct OperandTraits<GlobalIFunc>
+    : public FixedNumOperandTraits<GlobalIFunc, 1> {};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GlobalIFunc, Constant)
+
 } // end namespace llvm
 
 #endif // LLVM_IR_GLOBALIFUNC_H
diff --git a/llvm/include/llvm/IR/GlobalIndirectSymbol.h b/llvm/include/llvm/IR/GlobalIndirectSymbol.h
deleted file mode 100644
index e45c7529885d..000000000000
--- a/llvm/include/llvm/IR/GlobalIndirectSymbol.h
+++ /dev/null
@@ -1,93 +0,0 @@
-//===- llvm/GlobalIndirectSymbol.h - GlobalIndirectSymbol class -*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the declaration of the GlobalIndirectSymbol class, which
-// is a base class for GlobalAlias and GlobalIFunc. It contains all common code
-// for aliases and ifuncs.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_IR_GLOBALINDIRECTSYMBOL_H
-#define LLVM_IR_GLOBALINDIRECTSYMBOL_H
-
-#include "llvm/IR/GlobalObject.h"
-#include "llvm/IR/GlobalValue.h"
-#include "llvm/IR/OperandTraits.h"
-#include "llvm/IR/User.h"
-#include "llvm/IR/Value.h"
-#include "llvm/Support/Casting.h"
-#include <cstddef>
-
-namespace llvm {
-
-class GlobalIndirectSymbol : public GlobalValue {
-protected:
-  GlobalIndirectSymbol(Type *Ty, ValueTy VTy, unsigned AddressSpace,
-      LinkageTypes Linkage, const Twine &Name, Constant *Symbol);
-
-public:
-  GlobalIndirectSymbol(const GlobalIndirectSymbol &) = delete;
-  GlobalIndirectSymbol &operator=(const GlobalIndirectSymbol &) = delete;
-
-  // allocate space for exactly one operand
-  void *operator new(size_t S) { return User::operator new(S, 1); }
-  void operator delete(void *Ptr) { User::operator delete(Ptr); }
-
-  /// Provide fast operand accessors
-  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant);
-
-  void copyAttributesFrom(const GlobalValue *Src) {
-    GlobalValue::copyAttributesFrom(Src);
-  }
-
-  /// These methods set and retrieve indirect symbol.
-  void setIndirectSymbol(Constant *Symbol) {
-    setOperand(0, Symbol);
-  }
-  const Constant *getIndirectSymbol() const {
-    return getOperand(0);
-  }
-  Constant *getIndirectSymbol() {
-    return const_cast<Constant *>(
-          static_cast<const GlobalIndirectSymbol *>(this)->getIndirectSymbol());
-  }
-
-  const GlobalObject *getBaseObject() const;
-  GlobalObject *getBaseObject() {
-    return const_cast<GlobalObject *>(
-              static_cast<const GlobalIndirectSymbol *>(this)->getBaseObject());
-  }
-
-  const GlobalObject *getBaseObject(const DataLayout &DL, APInt &Offset) const {
-    return dyn_cast<GlobalObject>(
-        getIndirectSymbol()->stripAndAccumulateInBoundsConstantOffsets(DL,
-                                                                       Offset));
-  }
-  GlobalObject *getBaseObject(const DataLayout &DL, APInt &Offset) {
-    return const_cast<GlobalObject *>(
-                                 static_cast<const GlobalIndirectSymbol *>(this)
-                                   ->getBaseObject(DL, Offset));
-  }
-
-  // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static bool classof(const Value *V) {
-    return V->getValueID() == Value::GlobalAliasVal ||
-           V->getValueID() == Value::GlobalIFuncVal;
-  }
-};
-
-template <>
-struct OperandTraits<GlobalIndirectSymbol> :
-  public FixedNumOperandTraits<GlobalIndirectSymbol, 1> {
-};
-
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GlobalIndirectSymbol, Constant)
-
-} // end namespace llvm
-
-#endif // LLVM_IR_GLOBALINDIRECTSYMBOL_H
diff --git a/llvm/include/llvm/IR/GlobalObject.h b/llvm/include/llvm/IR/GlobalObject.h
index 341fbec66080..e15cf718bb10 100644
--- a/llvm/include/llvm/IR/GlobalObject.h
+++ b/llvm/include/llvm/IR/GlobalObject.h
@@ -51,7 +51,7 @@ protected:
 
   Comdat *ObjComdat;
   enum {
-    LastAlignmentBit = 4,
+    LastAlignmentBit = 5,
     HasSectionHashEntryBit,
 
     GlobalObjectBits,
@@ -68,7 +68,7 @@ public:
   GlobalObject(const GlobalObject &) = delete;
 
   /// FIXME: Remove this function once transition to Align is over.
-  unsigned getAlignment() const {
+  uint64_t getAlignment() const {
     MaybeAlign Align = getAlign();
     return Align ? Align->value() : 0;
   }
@@ -153,7 +153,8 @@ public:
   // Methods for support type inquiry through isa, cast, and dyn_cast:
   static bool classof(const Value *V) {
     return V->getValueID() == Value::FunctionVal ||
-           V->getValueID() == Value::GlobalVariableVal;
+           V->getValueID() == Value::GlobalVariableVal ||
+           V->getValueID() == Value::GlobalIFuncVal;
   }
 
 private:
diff --git a/llvm/include/llvm/IR/GlobalValue.h b/llvm/include/llvm/IR/GlobalValue.h
index cf704d1f2374..1818f2a8f3cc 100644
--- a/llvm/include/llvm/IR/GlobalValue.h
+++ b/llvm/include/llvm/IR/GlobalValue.h
@@ -302,11 +302,14 @@ public:
   static bool isAvailableExternallyLinkage(LinkageTypes Linkage) {
     return Linkage == AvailableExternallyLinkage;
   }
+  static bool isLinkOnceAnyLinkage(LinkageTypes Linkage) {
+    return Linkage == LinkOnceAnyLinkage;
+  }
   static bool isLinkOnceODRLinkage(LinkageTypes Linkage) {
     return Linkage == LinkOnceODRLinkage;
   }
   static bool isLinkOnceLinkage(LinkageTypes Linkage) {
-    return Linkage == LinkOnceAnyLinkage || Linkage == LinkOnceODRLinkage;
+    return isLinkOnceAnyLinkage(Linkage) || isLinkOnceODRLinkage(Linkage);
   }
   static bool isWeakAnyLinkage(LinkageTypes Linkage) {
     return Linkage == WeakAnyLinkage;
@@ -433,6 +436,9 @@ public:
     return isAvailableExternallyLinkage(getLinkage());
   }
   bool hasLinkOnceLinkage() const { return isLinkOnceLinkage(getLinkage()); }
+  bool hasLinkOnceAnyLinkage() const {
+    return isLinkOnceAnyLinkage(getLinkage());
+  }
   bool hasLinkOnceODRLinkage() const {
     return isLinkOnceODRLinkage(getLinkage());
   }
@@ -548,10 +554,10 @@ public:
     return !(isDeclarationForLinker() || isWeakForLinker());
   }
 
-  const GlobalObject *getBaseObject() const;
-  GlobalObject *getBaseObject() {
+  const GlobalObject *getAliaseeObject() const;
+  GlobalObject *getAliaseeObject() {
     return const_cast<GlobalObject *>(
-                       static_cast<const GlobalValue *>(this)->getBaseObject());
+        static_cast<const GlobalValue *>(this)->getAliaseeObject());
   }
 
   /// Returns whether this is a reference to an absolute symbol.
diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h
index 8998ad0f94a9..b4e099e4ec20 100644
--- a/llvm/include/llvm/IR/IRBuilder.h
+++ b/llvm/include/llvm/IR/IRBuilder.h
@@ -316,7 +316,7 @@ public:
   /// Set the exception handling to be used with constrained floating point
   void setDefaultConstrainedExcept(fp::ExceptionBehavior NewExcept) {
 #ifndef NDEBUG
-    Optional<StringRef> ExceptStr = ExceptionBehaviorToStr(NewExcept);
+    Optional<StringRef> ExceptStr = convertExceptionBehaviorToStr(NewExcept);
     assert(ExceptStr.hasValue() && "Garbage strict exception behavior!");
 #endif
     DefaultConstrainedExcept = NewExcept;
@@ -325,7 +325,7 @@ public:
   /// Set the rounding mode handling to be used with constrained floating point
   void setDefaultConstrainedRounding(RoundingMode NewRounding) {
 #ifndef NDEBUG
-    Optional<StringRef> RoundingStr = RoundingModeToStr(NewRounding);
+    Optional<StringRef> RoundingStr = convertRoundingModeToStr(NewRounding);
     assert(RoundingStr.hasValue() && "Garbage strict rounding mode!");
 #endif
     DefaultConstrainedRounding = NewRounding;
@@ -351,7 +351,7 @@ public:
   }
 
   void setConstrainedFPCallAttr(CallBase *I) {
-    I->addAttribute(AttributeList::FunctionIndex, Attribute::StrictFP);
+    I->addFnAttr(Attribute::StrictFP);
   }
 
   void setDefaultOperandBundles(ArrayRef<OperandBundleDef> OpBundles) {
@@ -697,12 +697,16 @@ public:
       MDNode *TBAAStructTag = nullptr, MDNode *ScopeTag = nullptr,
       MDNode *NoAliasTag = nullptr);
 
-  /// Create a vector fadd reduction intrinsic of the source vector.
-  /// The first parameter is a scalar accumulator value for ordered reductions.
+  /// Create a sequential vector fadd reduction intrinsic of the source vector.
+  /// The first parameter is a scalar accumulator value. An unordered reduction
+  /// can be created by adding the reassoc fast-math flag to the resulting
+  /// sequential reduction.
   CallInst *CreateFAddReduce(Value *Acc, Value *Src);
 
-  /// Create a vector fmul reduction intrinsic of the source vector.
-  /// The first parameter is a scalar accumulator value for ordered reductions.
+  /// Create a sequential vector fmul reduction intrinsic of the source vector.
+  /// The first parameter is a scalar accumulator value. An unordered reduction
+  /// can be created by adding the reassoc fast-math flag to the resulting
+  /// sequential reduction.
   CallInst *CreateFMulReduce(Value *Acc, Value *Src);
 
   /// Create a vector int add reduction intrinsic of the source vector.
@@ -1172,7 +1176,7 @@ private:
     if (Rounding.hasValue())
       UseRounding = Rounding.getValue();
 
-    Optional<StringRef> RoundingStr = RoundingModeToStr(UseRounding);
+    Optional<StringRef> RoundingStr = convertRoundingModeToStr(UseRounding);
     assert(RoundingStr.hasValue() && "Garbage strict rounding mode!");
     auto *RoundingMDS = MDString::get(Context, RoundingStr.getValue());
 
@@ -1185,7 +1189,7 @@ private:
     if (Except.hasValue())
       UseExcept = Except.getValue();
 
-    Optional<StringRef> ExceptStr = ExceptionBehaviorToStr(UseExcept);
+    Optional<StringRef> ExceptStr = convertExceptionBehaviorToStr(UseExcept);
     assert(ExceptStr.hasValue() && "Garbage strict exception behavior!");
     auto *ExceptMDS = MDString::get(Context, ExceptStr.getValue());
 
@@ -2448,6 +2452,16 @@ public:
     return CreateExtractElement(Vec, getInt64(Idx), Name);
   }
 
+  Value *CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx,
+                             const Twine &Name = "") {
+    return CreateInsertElement(PoisonValue::get(VecTy), NewElt, Idx, Name);
+  }
+
+  Value *CreateInsertElement(Type *VecTy, Value *NewElt, uint64_t Idx,
+                             const Twine &Name = "") {
+    return CreateInsertElement(PoisonValue::get(VecTy), NewElt, Idx, Name);
+  }
+
   Value *CreateInsertElement(Value *Vec, Value *NewElt, Value *Idx,
                              const Twine &Name = "") {
     if (auto *VC = dyn_cast<Constant>(Vec))
diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h
index ef2c279ed455..143a87f4997d 100644
--- a/llvm/include/llvm/IR/InstrTypes.h
+++ b/llvm/include/llvm/IR/InstrTypes.h
@@ -19,6 +19,7 @@
 #include "llvm/ADT/None.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Sequence.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
@@ -755,6 +756,20 @@ public:
   using PredicateField =
       Bitfield::Element<Predicate, 0, 6, LAST_ICMP_PREDICATE>;
 
+  /// Returns the sequence of all FCmp predicates.
+  static auto FCmpPredicates() {
+    return enum_seq_inclusive(Predicate::FIRST_FCMP_PREDICATE,
+                              Predicate::LAST_FCMP_PREDICATE,
+                              force_iteration_on_noniterable_enum);
+  }
+
+  /// Returns the sequence of all ICmp predicates.
+  static auto ICmpPredicates() {
+    return enum_seq_inclusive(Predicate::FIRST_ICMP_PREDICATE,
+                              Predicate::LAST_ICMP_PREDICATE,
+                              force_iteration_on_noniterable_enum);
+  }
+
 protected:
   CmpInst(Type *ty, Instruction::OtherOps op, Predicate pred,
           Value *LHS, Value *RHS, const Twine &Name = "",
@@ -1325,33 +1340,23 @@ public:
   bool arg_empty() const { return arg_end() == arg_begin(); }
   unsigned arg_size() const { return arg_end() - arg_begin(); }
 
-  // Legacy API names that duplicate the above and will be removed once users
-  // are migrated.
-  iterator_range<User::op_iterator> arg_operands() {
-    return make_range(arg_begin(), arg_end());
-  }
-  iterator_range<User::const_op_iterator> arg_operands() const {
-    return make_range(arg_begin(), arg_end());
-  }
-  unsigned getNumArgOperands() const { return arg_size(); }
-
   Value *getArgOperand(unsigned i) const {
-    assert(i < getNumArgOperands() && "Out of bounds!");
+    assert(i < arg_size() && "Out of bounds!");
     return getOperand(i);
   }
 
   void setArgOperand(unsigned i, Value *v) {
-    assert(i < getNumArgOperands() && "Out of bounds!");
+    assert(i < arg_size() && "Out of bounds!");
     setOperand(i, v);
   }
 
   /// Wrappers for getting the \c Use of a call argument.
   const Use &getArgOperandUse(unsigned i) const {
-    assert(i < getNumArgOperands() && "Out of bounds!");
+    assert(i < arg_size() && "Out of bounds!");
     return User::getOperandUse(i);
   }
   Use &getArgOperandUse(unsigned i) {
-    assert(i < getNumArgOperands() && "Out of bounds!");
+    assert(i < arg_size() && "Out of bounds!");
     return User::getOperandUse(i);
   }
 
@@ -1485,92 +1490,104 @@ public:
   /// the attribute is allowed for the call.
   bool hasFnAttr(StringRef Kind) const { return hasFnAttrImpl(Kind); }
 
+  // TODO: remove non-AtIndex versions of these methods.
   /// adds the attribute to the list of attributes.
-  void addAttribute(unsigned i, Attribute::AttrKind Kind) {
-    AttributeList PAL = getAttributes();
-    PAL = PAL.addAttribute(getContext(), i, Kind);
-    setAttributes(PAL);
+  void addAttributeAtIndex(unsigned i, Attribute::AttrKind Kind) {
+    Attrs = Attrs.addAttributeAtIndex(getContext(), i, Kind);
   }
 
   /// adds the attribute to the list of attributes.
-  void addAttribute(unsigned i, Attribute Attr) {
-    AttributeList PAL = getAttributes();
-    PAL = PAL.addAttribute(getContext(), i, Attr);
-    setAttributes(PAL);
+  void addAttributeAtIndex(unsigned i, Attribute Attr) {
+    Attrs = Attrs.addAttributeAtIndex(getContext(), i, Attr);
+  }
+
+  /// Adds the attribute to the function.
+  void addFnAttr(Attribute::AttrKind Kind) {
+    Attrs = Attrs.addFnAttribute(getContext(), Kind);
+  }
+
+  /// Adds the attribute to the function.
+  void addFnAttr(Attribute Attr) {
+    Attrs = Attrs.addFnAttribute(getContext(), Attr);
+  }
+
+  /// Adds the attribute to the return value.
+  void addRetAttr(Attribute::AttrKind Kind) {
+    Attrs = Attrs.addRetAttribute(getContext(), Kind);
+  }
+
+  /// Adds the attribute to the return value.
+  void addRetAttr(Attribute Attr) {
+    Attrs = Attrs.addRetAttribute(getContext(), Attr);
   }
 
   /// Adds the attribute to the indicated argument
   void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) {
-    assert(ArgNo < getNumArgOperands() && "Out of bounds");
-    AttributeList PAL = getAttributes();
-    PAL = PAL.addParamAttribute(getContext(), ArgNo, Kind);
-    setAttributes(PAL);
+    assert(ArgNo < arg_size() && "Out of bounds");
+    Attrs = Attrs.addParamAttribute(getContext(), ArgNo, Kind);
   }
 
   /// Adds the attribute to the indicated argument
   void addParamAttr(unsigned ArgNo, Attribute Attr) {
-    assert(ArgNo < getNumArgOperands() && "Out of bounds");
-    AttributeList PAL = getAttributes();
-    PAL = PAL.addParamAttribute(getContext(), ArgNo, Attr);
-    setAttributes(PAL);
+    assert(ArgNo < arg_size() && "Out of bounds");
+    Attrs = Attrs.addParamAttribute(getContext(), ArgNo, Attr);
   }
 
   /// removes the attribute from the list of attributes.
-  void removeAttribute(unsigned i, Attribute::AttrKind Kind) {
-    AttributeList PAL = getAttributes();
-    PAL = PAL.removeAttribute(getContext(), i, Kind);
-    setAttributes(PAL);
+  void removeAttributeAtIndex(unsigned i, Attribute::AttrKind Kind) {
+    Attrs = Attrs.removeAttributeAtIndex(getContext(), i, Kind);
   }
 
   /// removes the attribute from the list of attributes.
-  void removeAttribute(unsigned i, StringRef Kind) {
-    AttributeList PAL = getAttributes();
-    PAL = PAL.removeAttribute(getContext(), i, Kind);
-    setAttributes(PAL);
+  void removeAttributeAtIndex(unsigned i, StringRef Kind) {
+    Attrs = Attrs.removeAttributeAtIndex(getContext(), i, Kind);
+  }
+
+  /// Removes the attributes from the function
+  void removeFnAttrs(const AttrBuilder &AttrsToRemove) {
+    Attrs = Attrs.removeFnAttributes(getContext(), AttrsToRemove);
+  }
+
+  /// Removes the attribute from the function
+  void removeFnAttr(Attribute::AttrKind Kind) {
+    Attrs = Attrs.removeFnAttribute(getContext(), Kind);
   }
 
-  void removeAttributes(unsigned i, const AttrBuilder &Attrs) {
-    AttributeList PAL = getAttributes();
-    PAL = PAL.removeAttributes(getContext(), i, Attrs);
-    setAttributes(PAL);
+  /// Removes the attribute from the return value
+  void removeRetAttr(Attribute::AttrKind Kind) {
+    Attrs = Attrs.removeRetAttribute(getContext(), Kind);
+  }
+
+  /// Removes the attributes from the return value
+  void removeRetAttrs(const AttrBuilder &AttrsToRemove) {
+    Attrs = Attrs.removeRetAttributes(getContext(), AttrsToRemove);
   }
 
   /// Removes the attribute from the given argument
   void removeParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) {
-    assert(ArgNo < getNumArgOperands() && "Out of bounds");
-    AttributeList PAL = getAttributes();
-    PAL = PAL.removeParamAttribute(getContext(), ArgNo, Kind);
-    setAttributes(PAL);
+    assert(ArgNo < arg_size() && "Out of bounds");
+    Attrs = Attrs.removeParamAttribute(getContext(), ArgNo, Kind);
   }
 
   /// Removes the attribute from the given argument
   void removeParamAttr(unsigned ArgNo, StringRef Kind) {
-    assert(ArgNo < getNumArgOperands() && "Out of bounds");
-    AttributeList PAL = getAttributes();
-    PAL = PAL.removeParamAttribute(getContext(), ArgNo, Kind);
-    setAttributes(PAL);
+    assert(ArgNo < arg_size() && "Out of bounds");
+    Attrs = Attrs.removeParamAttribute(getContext(), ArgNo, Kind);
   }
 
   /// Removes the attributes from the given argument
-  void removeParamAttrs(unsigned ArgNo, const AttrBuilder &Attrs) {
-    AttributeList PAL = getAttributes();
-    PAL = PAL.removeParamAttributes(getContext(), ArgNo, Attrs);
-    setAttributes(PAL);
+  void removeParamAttrs(unsigned ArgNo, const AttrBuilder &AttrsToRemove) {
+    Attrs = Attrs.removeParamAttributes(getContext(), ArgNo, AttrsToRemove);
   }
 
   /// adds the dereferenceable attribute to the list of attributes.
-  void addDereferenceableAttr(unsigned i, uint64_t Bytes) {
-    AttributeList PAL = getAttributes();
-    PAL = PAL.addDereferenceableAttr(getContext(), i, Bytes);
-    setAttributes(PAL);
+  void addDereferenceableParamAttr(unsigned i, uint64_t Bytes) {
+    Attrs = Attrs.addDereferenceableParamAttr(getContext(), i, Bytes);
   }
 
-  /// adds the dereferenceable_or_null attribute to the list of
-  /// attributes.
-  void addDereferenceableOrNullAttr(unsigned i, uint64_t Bytes) {
-    AttributeList PAL = getAttributes();
-    PAL = PAL.addDereferenceableOrNullAttr(getContext(), i, Bytes);
-    setAttributes(PAL);
+  /// adds the dereferenceable attribute to the list of attributes.
+  void addDereferenceableRetAttr(uint64_t Bytes) {
+    Attrs = Attrs.addDereferenceableRetAttr(getContext(), Bytes);
   }
 
   /// Determine whether the return value has the given attribute.
@@ -1584,24 +1601,34 @@ public:
   bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const;
 
   /// Get the attribute of a given kind at a position.
-  Attribute getAttribute(unsigned i, Attribute::AttrKind Kind) const {
-    return getAttributes().getAttribute(i, Kind);
+  Attribute getAttributeAtIndex(unsigned i, Attribute::AttrKind Kind) const {
+    return getAttributes().getAttributeAtIndex(i, Kind);
   }
 
   /// Get the attribute of a given kind at a position.
-  Attribute getAttribute(unsigned i, StringRef Kind) const {
-    return getAttributes().getAttribute(i, Kind);
+  Attribute getAttributeAtIndex(unsigned i, StringRef Kind) const {
+    return getAttributes().getAttributeAtIndex(i, Kind);
+  }
+
+  /// Get the attribute of a given kind for the function.
+  Attribute getFnAttr(StringRef Kind) const {
+    return getAttributes().getFnAttr(Kind);
+  }
+
+  /// Get the attribute of a given kind for the function.
+  Attribute getFnAttr(Attribute::AttrKind Kind) const {
+    return getAttributes().getFnAttr(Kind);
   }
 
   /// Get the attribute of a given kind from a given arg
   Attribute getParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) const {
-    assert(ArgNo < getNumArgOperands() && "Out of bounds");
+    assert(ArgNo < arg_size() && "Out of bounds");
     return getAttributes().getParamAttr(ArgNo, Kind);
   }
 
   /// Get the attribute of a given kind from a given arg
   Attribute getParamAttr(unsigned ArgNo, StringRef Kind) const {
-    assert(ArgNo < getNumArgOperands() && "Out of bounds");
+    assert(ArgNo < arg_size() && "Out of bounds");
     return getAttributes().getParamAttr(ArgNo, Kind);
   }
 
@@ -1609,42 +1636,35 @@ public:
   /// A.
   ///
   /// Data operands include call arguments and values used in operand bundles,
-  /// but does not include the callee operand.  This routine dispatches to the
-  /// underlying AttributeList or the OperandBundleUser as appropriate.
+  /// but does not include the callee operand.
   ///
   /// The index \p i is interpreted as
   ///
-  ///  \p i == Attribute::ReturnIndex  -> the return value
-  ///  \p i in [1, arg_size + 1)  -> argument number (\p i - 1)
-  ///  \p i in [arg_size + 1, data_operand_size + 1) -> bundle operand at index
-  ///     (\p i - 1) in the operand list.
+  ///  \p i in [0, arg_size)  -> argument number (\p i)
+  ///  \p i in [arg_size, data_operand_size) -> bundle operand at index
+  ///     (\p i) in the operand list.
   bool dataOperandHasImpliedAttr(unsigned i, Attribute::AttrKind Kind) const {
     // Note that we have to add one because `i` isn't zero-indexed.
-    assert(i < (getNumArgOperands() + getNumTotalBundleOperands() + 1) &&
+    assert(i < arg_size() + getNumTotalBundleOperands() &&
            "Data operand index out of bounds!");
 
     // The attribute A can either be directly specified, if the operand in
     // question is a call argument; or be indirectly implied by the kind of its
     // containing operand bundle, if the operand is a bundle operand.
 
-    if (i == AttributeList::ReturnIndex)
-      return hasRetAttr(Kind);
-
-    // FIXME: Avoid these i - 1 calculations and update the API to use
-    // zero-based indices.
-    if (i < (getNumArgOperands() + 1))
-      return paramHasAttr(i - 1, Kind);
+    if (i < arg_size())
+      return paramHasAttr(i, Kind);
 
-    assert(hasOperandBundles() && i >= (getBundleOperandsStartIndex() + 1) &&
+    assert(hasOperandBundles() && i >= getBundleOperandsStartIndex() &&
            "Must be either a call argument or an operand bundle!");
-    return bundleOperandHasAttr(i - 1, Kind);
+    return bundleOperandHasAttr(i, Kind);
   }
 
   /// Determine whether this data operand is not captured.
   // FIXME: Once this API is no longer duplicated in `CallSite`, rename this to
   // better indicate that this may return a conservative answer.
   bool doesNotCapture(unsigned OpNo) const {
-    return dataOperandHasImpliedAttr(OpNo + 1, Attribute::NoCapture);
+    return dataOperandHasImpliedAttr(OpNo, Attribute::NoCapture);
   }
 
   /// Determine whether this argument is passed by value.
@@ -1685,21 +1705,21 @@ public:
   // FIXME: Once this API is no longer duplicated in `CallSite`, rename this to
   // better indicate that this may return a conservative answer.
   bool doesNotAccessMemory(unsigned OpNo) const {
-    return dataOperandHasImpliedAttr(OpNo + 1, Attribute::ReadNone);
+    return dataOperandHasImpliedAttr(OpNo, Attribute::ReadNone);
   }
 
   // FIXME: Once this API is no longer duplicated in `CallSite`, rename this to
   // better indicate that this may return a conservative answer.
   bool onlyReadsMemory(unsigned OpNo) const {
-    return dataOperandHasImpliedAttr(OpNo + 1, Attribute::ReadOnly) ||
-           dataOperandHasImpliedAttr(OpNo + 1, Attribute::ReadNone);
+    return dataOperandHasImpliedAttr(OpNo, Attribute::ReadOnly) ||
+           dataOperandHasImpliedAttr(OpNo, Attribute::ReadNone);
   }
 
   // FIXME: Once this API is no longer duplicated in `CallSite`, rename this to
   // better indicate that this may return a conservative answer.
   bool doesNotReadMemory(unsigned OpNo) const {
-    return dataOperandHasImpliedAttr(OpNo + 1, Attribute::WriteOnly) ||
-           dataOperandHasImpliedAttr(OpNo + 1, Attribute::ReadNone);
+    return dataOperandHasImpliedAttr(OpNo, Attribute::WriteOnly) ||
+           dataOperandHasImpliedAttr(OpNo, Attribute::ReadNone);
   }
 
   /// Extract the alignment of the return value.
@@ -1743,14 +1763,26 @@ public:
 
   /// Extract the number of dereferenceable bytes for a call or
   /// parameter (0=unknown).
-  uint64_t getDereferenceableBytes(unsigned i) const {
-    return Attrs.getDereferenceableBytes(i);
+  uint64_t getRetDereferenceableBytes() const {
+    return Attrs.getRetDereferenceableBytes();
+  }
+
+  /// Extract the number of dereferenceable bytes for a call or
+  /// parameter (0=unknown).
+  uint64_t getParamDereferenceableBytes(unsigned i) const {
+    return Attrs.getParamDereferenceableBytes(i);
   }
 
-  /// Extract the number of dereferenceable_or_null bytes for a call or
+  /// Extract the number of dereferenceable_or_null bytes for a call
+  /// (0=unknown).
+  uint64_t getRetDereferenceableOrNullBytes() const {
+    return Attrs.getRetDereferenceableOrNullBytes();
+  }
+
+  /// Extract the number of dereferenceable_or_null bytes for a
   /// parameter (0=unknown).
-  uint64_t getDereferenceableOrNullBytes(unsigned i) const {
-    return Attrs.getDereferenceableOrNullBytes(i);
+  uint64_t getParamDereferenceableOrNullBytes(unsigned i) const {
+    return Attrs.getParamDereferenceableOrNullBytes(i);
   }
 
   /// Return true if the return value is known to be not null.
@@ -1760,7 +1792,7 @@ public:
 
   /// Determine if the return value is marked with NoAlias attribute.
   bool returnDoesNotAlias() const {
-    return Attrs.hasAttribute(AttributeList::ReturnIndex, Attribute::NoAlias);
+    return Attrs.hasRetAttr(Attribute::NoAlias);
   }
 
   /// If one of the arguments has the 'returned' attribute, returns its
@@ -1779,40 +1811,30 @@ public:
 
   /// Return true if the call should not be inlined.
   bool isNoInline() const { return hasFnAttr(Attribute::NoInline); }
-  void setIsNoInline() {
-    addAttribute(AttributeList::FunctionIndex, Attribute::NoInline);
-  }
+  void setIsNoInline() { addFnAttr(Attribute::NoInline); }
   /// Determine if the call does not access memory.
   bool doesNotAccessMemory() const { return hasFnAttr(Attribute::ReadNone); }
-  void setDoesNotAccessMemory() {
-    addAttribute(AttributeList::FunctionIndex, Attribute::ReadNone);
-  }
+  void setDoesNotAccessMemory() { addFnAttr(Attribute::ReadNone); }
 
   /// Determine if the call does not access or only reads memory.
   bool onlyReadsMemory() const {
     return doesNotAccessMemory() || hasFnAttr(Attribute::ReadOnly);
   }
 
-  void setOnlyReadsMemory() {
-    addAttribute(AttributeList::FunctionIndex, Attribute::ReadOnly);
-  }
+  void setOnlyReadsMemory() { addFnAttr(Attribute::ReadOnly); }
 
   /// Determine if the call does not access or only writes memory.
   bool doesNotReadMemory() const {
     return doesNotAccessMemory() || hasFnAttr(Attribute::WriteOnly);
   }
-  void setDoesNotReadMemory() {
-    addAttribute(AttributeList::FunctionIndex, Attribute::WriteOnly);
-  }
+  void setDoesNotReadMemory() { addFnAttr(Attribute::WriteOnly); }
 
   /// Determine if the call can access memmory only using pointers based
   /// on its arguments.
   bool onlyAccessesArgMemory() const {
     return hasFnAttr(Attribute::ArgMemOnly);
   }
-  void setOnlyAccessesArgMemory() {
-    addAttribute(AttributeList::FunctionIndex, Attribute::ArgMemOnly);
-  }
+  void setOnlyAccessesArgMemory() { addFnAttr(Attribute::ArgMemOnly); }
 
   /// Determine if the function may only access memory that is
   /// inaccessible from the IR.
@@ -1820,7 +1842,7 @@ public:
     return hasFnAttr(Attribute::InaccessibleMemOnly);
   }
   void setOnlyAccessesInaccessibleMemory() {
-    addAttribute(AttributeList::FunctionIndex, Attribute::InaccessibleMemOnly);
+    addFnAttr(Attribute::InaccessibleMemOnly);
   }
 
   /// Determine if the function may only access memory that is
@@ -1829,49 +1851,36 @@ public:
     return hasFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
   }
   void setOnlyAccessesInaccessibleMemOrArgMem() {
-    addAttribute(AttributeList::FunctionIndex,
-                 Attribute::InaccessibleMemOrArgMemOnly);
+    addFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
   }
   /// Determine if the call cannot return.
   bool doesNotReturn() const { return hasFnAttr(Attribute::NoReturn); }
-  void setDoesNotReturn() {
-    addAttribute(AttributeList::FunctionIndex, Attribute::NoReturn);
-  }
+  void setDoesNotReturn() { addFnAttr(Attribute::NoReturn); }
 
   /// Determine if the call should not perform indirect branch tracking.
   bool doesNoCfCheck() const { return hasFnAttr(Attribute::NoCfCheck); }
 
   /// Determine if the call cannot unwind.
   bool doesNotThrow() const { return hasFnAttr(Attribute::NoUnwind); }
-  void setDoesNotThrow() {
-    addAttribute(AttributeList::FunctionIndex, Attribute::NoUnwind);
-  }
+  void setDoesNotThrow() { addFnAttr(Attribute::NoUnwind); }
 
   /// Determine if the invoke cannot be duplicated.
   bool cannotDuplicate() const { return hasFnAttr(Attribute::NoDuplicate); }
-  void setCannotDuplicate() {
-    addAttribute(AttributeList::FunctionIndex, Attribute::NoDuplicate);
-  }
+  void setCannotDuplicate() { addFnAttr(Attribute::NoDuplicate); }
 
   /// Determine if the call cannot be tail merged.
   bool cannotMerge() const { return hasFnAttr(Attribute::NoMerge); }
-  void setCannotMerge() {
-    addAttribute(AttributeList::FunctionIndex, Attribute::NoMerge);
-  }
+  void setCannotMerge() { addFnAttr(Attribute::NoMerge); }
 
   /// Determine if the invoke is convergent
   bool isConvergent() const { return hasFnAttr(Attribute::Convergent); }
-  void setConvergent() {
-    addAttribute(AttributeList::FunctionIndex, Attribute::Convergent);
-  }
-  void setNotConvergent() {
-    removeAttribute(AttributeList::FunctionIndex, Attribute::Convergent);
-  }
+  void setConvergent() { addFnAttr(Attribute::Convergent); }
+  void setNotConvergent() { removeFnAttr(Attribute::Convergent); }
 
   /// Determine if the call returns a structure through first
   /// pointer argument.
   bool hasStructRetAttr() const {
-    if (getNumArgOperands() == 0)
+    if (arg_empty())
       return false;
 
     // Be friendly and also check the callee.
@@ -1918,6 +1927,13 @@ public:
            Idx < getBundleOperandsEndIndex();
   }
 
+  /// Return true if the operand at index \p Idx is a bundle operand that has
+  /// tag ID \p ID.
+  bool isOperandBundleOfType(uint32_t ID, unsigned Idx) const {
+    return isBundleOperand(Idx) &&
+           getOperandBundleForOperand(Idx).getTagID() == ID;
+  }
+
   /// Returns true if the use is a bundle operand.
   bool isBundleOperand(const Use *U) const {
     assert(this == U->getUser() &&
@@ -2258,7 +2274,7 @@ private:
   bool hasFnAttrOnCalledFunction(StringRef Kind) const;
 
   template <typename AttrKind> bool hasFnAttrImpl(AttrKind Kind) const {
-    if (Attrs.hasFnAttribute(Kind))
+    if (Attrs.hasFnAttr(Kind))
       return true;
 
     // Operand bundles override attributes on the called function, but don't
@@ -2272,12 +2288,12 @@ private:
   /// Determine whether the return value has the given attribute. Supports
   /// Attribute::AttrKind and StringRef as \p AttrKind types.
   template <typename AttrKind> bool hasRetAttrImpl(AttrKind Kind) const {
-    if (Attrs.hasAttribute(AttributeList::ReturnIndex, Kind))
+    if (Attrs.hasRetAttr(Kind))
       return true;
 
     // Look at the callee, if available.
     if (const Function *F = getCalledFunction())
-      return F->getAttributes().hasAttribute(AttributeList::ReturnIndex, Kind);
+      return F->getAttributes().hasRetAttr(Kind);
     return false;
   }
 };
diff --git a/llvm/include/llvm/IR/Instruction.h b/llvm/include/llvm/IR/Instruction.h
index deb85cf277fe..9878082ffffa 100644
--- a/llvm/include/llvm/IR/Instruction.h
+++ b/llvm/include/llvm/IR/Instruction.h
@@ -59,11 +59,11 @@ protected:
   // Template alias so that all Instruction storing alignment use the same
   // definiton.
   // Valid alignments are powers of two from 2^0 to 2^MaxAlignmentExponent =
-  // 2^29. We store them as Log2(Alignment), so we need 5 bits to encode the 30
+  // 2^32. We store them as Log2(Alignment), so we need 6 bits to encode the 33
   // possible values.
   template <unsigned Offset>
   using AlignmentBitfieldElementT =
-      typename Bitfield::Element<unsigned, Offset, 5,
+      typename Bitfield::Element<unsigned, Offset, 6,
                                  Value::MaxAlignmentExponent>;
 
   template <unsigned Offset>
@@ -307,11 +307,6 @@ public:
     Value::getAllMetadata(MDs);
   }
 
-  /// Fills the AAMDNodes structure with AA metadata from this instruction.
-  /// When Merge is true, the existing AA metadata is merged with that from this
-  /// instruction providing the most-general result.
-  void getAAMetadata(AAMDNodes &N, bool Merge = false) const;
-
   /// Set the metadata of the specified kind to the specified node. This updates
   /// or replaces metadata if already present, or removes it if Node is null.
   void setMetadata(unsigned KindID, MDNode *Node);
@@ -352,7 +347,10 @@ public:
   /// to the existing node.
   void addAnnotationMetadata(StringRef Annotation);
 
-  /// Sets the metadata on this instruction from the AAMDNodes structure.
+  /// Returns the AA metadata for this instruction.
+  AAMDNodes getAAMetadata() const;
+
+  /// Sets the AA metadata on this instruction from the AAMDNodes structure.
   void setAAMetadata(const AAMDNodes &N);
 
   /// Retrieve the raw weight values of a conditional branch or select.
@@ -389,6 +387,10 @@ public:
   /// Determine whether the no signed wrap flag is set.
   bool hasNoSignedWrap() const;
 
+  /// Return true if this operator has flags which may cause this instruction
+  /// to evaluate to poison despite having non-poison inputs.
+  bool hasPoisonGeneratingFlags() const;
+
   /// Drops flags that may cause this instruction to evaluate to poison despite
   /// having non-poison inputs.
   void dropPoisonGeneratingFlags();
diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h
index 0c43a56daa33..6d32a898b668 100644
--- a/llvm/include/llvm/IR/Instructions.h
+++ b/llvm/include/llvm/IR/Instructions.h
@@ -126,7 +126,7 @@ public:
   }
 
   // FIXME: Remove this one transition to Align is over.
-  unsigned getAlignment() const { return getAlign().value(); }
+  uint64_t getAlignment() const { return getAlign().value(); }
 
   /// Return true if this alloca is in the entry block of the function and is a
   /// constant size. If so, the code generator will fold it into the
@@ -217,7 +217,7 @@ public:
   /// Return the alignment of the access that is being performed.
   /// FIXME: Remove this function once transition to Align is over.
   /// Use getAlign() instead.
-  unsigned getAlignment() const { return getAlign().value(); }
+  uint64_t getAlignment() const { return getAlign().value(); }
 
   /// Return the alignment of the access that is being performed.
   Align getAlign() const {
@@ -348,7 +348,7 @@ public:
   /// Return the alignment of the access that is being performed
   /// FIXME: Remove this function once transition to Align is over.
   /// Use getAlign() instead.
-  unsigned getAlignment() const { return getAlign().value(); }
+  uint64_t getAlignment() const { return getAlign().value(); }
 
   Align getAlign() const {
     return Align(1ULL << (getSubclassData<AlignmentField>()));
@@ -1339,6 +1339,10 @@ public:
     return P == ICMP_SLE || P == ICMP_ULE;
   }
 
+  /// Returns the sequence of all ICmp predicates.
+  ///
+  static auto predicates() { return ICmpPredicates(); }
+
   /// Exchange the two operands to this instruction in such a way that it does
   /// not modify the semantics of the instruction. The predicate value may be
   /// changed to retain the same result if the predicate is order dependent
@@ -1349,6 +1353,10 @@ public:
     Op<0>().swap(Op<1>());
   }
 
+  /// Return result of `LHS Pred RHS` comparison.
+  static bool compare(const APInt &LHS, const APInt &RHS,
+                      ICmpInst::Predicate Pred);
+
   // Methods for support type inquiry through isa, cast, and dyn_cast:
   static bool classof(const Instruction *I) {
     return I->getOpcode() == Instruction::ICmp;
@@ -1457,6 +1465,10 @@ public:
     Op<0>().swap(Op<1>());
   }
 
+  /// Returns the sequence of all FCmp predicates.
+  ///
+  static auto predicates() { return FCmpPredicates(); }
+
   /// Methods for support type inquiry through isa, cast, and dyn_cast:
   static bool classof(const Instruction *I) {
     return I->getOpcode() == Instruction::FCmp;
@@ -1685,9 +1697,7 @@ public:
 
   /// Return true if the call can return twice
   bool canReturnTwice() const { return hasFnAttr(Attribute::ReturnsTwice); }
-  void setCanReturnTwice() {
-    addAttribute(AttributeList::FunctionIndex, Attribute::ReturnsTwice);
-  }
+  void setCanReturnTwice() { addFnAttr(Attribute::ReturnsTwice); }
 
   // Methods for support type inquiry through isa, cast, and dyn_cast:
   static bool classof(const Instruction *I) {
@@ -2019,6 +2029,14 @@ protected:
   ShuffleVectorInst *cloneImpl() const;
 
 public:
+  ShuffleVectorInst(Value *V1, Value *Mask, const Twine &NameStr = "",
+                    Instruction *InsertBefore = nullptr);
+  ShuffleVectorInst(Value *V1, Value *Mask, const Twine &NameStr,
+                    BasicBlock *InsertAtEnd);
+  ShuffleVectorInst(Value *V1, ArrayRef<int> Mask, const Twine &NameStr = "",
+                    Instruction *InsertBefore = nullptr);
+  ShuffleVectorInst(Value *V1, ArrayRef<int> Mask, const Twine &NameStr,
+                    BasicBlock *InsertAtEnd);
   ShuffleVectorInst(Value *V1, Value *V2, Value *Mask,
                     const Twine &NameStr = "",
                     Instruction *InsertBefor = nullptr);
@@ -2306,6 +2324,57 @@ public:
     return isExtractSubvectorMask(ShuffleMask, NumSrcElts, Index);
   }
 
+  /// Return true if this shuffle mask is an insert subvector mask.
+  /// A valid insert subvector mask inserts the lowest elements of a second
+  /// source operand into an in-place first source operand operand.
+  /// Both the sub vector width and the insertion index is returned.
+  static bool isInsertSubvectorMask(ArrayRef<int> Mask, int NumSrcElts,
+                                    int &NumSubElts, int &Index);
+  static bool isInsertSubvectorMask(const Constant *Mask, int NumSrcElts,
+                                    int &NumSubElts, int &Index) {
+    assert(Mask->getType()->isVectorTy() && "Shuffle needs vector constant.");
+    // Not possible to express a shuffle mask for a scalable vector for this
+    // case.
+    if (isa<ScalableVectorType>(Mask->getType()))
+      return false;
+    SmallVector<int, 16> MaskAsInts;
+    getShuffleMask(Mask, MaskAsInts);
+    return isInsertSubvectorMask(MaskAsInts, NumSrcElts, NumSubElts, Index);
+  }
+
+  /// Return true if this shuffle mask is an insert subvector mask.
+  bool isInsertSubvectorMask(int &NumSubElts, int &Index) const {
+    // Not possible to express a shuffle mask for a scalable vector for this
+    // case.
+    if (isa<ScalableVectorType>(getType()))
+      return false;
+
+    int NumSrcElts =
+        cast<FixedVectorType>(Op<0>()->getType())->getNumElements();
+    return isInsertSubvectorMask(ShuffleMask, NumSrcElts, NumSubElts, Index);
+  }
+
+  /// Return true if this shuffle mask replicates each of the \p VF elements
+  /// in a vector \p ReplicationFactor times.
+  /// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is:
+  ///   <0,0,0,1,1,1,2,2,2,3,3,3>
+  static bool isReplicationMask(ArrayRef<int> Mask, int &ReplicationFactor,
+                                int &VF);
+  static bool isReplicationMask(const Constant *Mask, int &ReplicationFactor,
+                                int &VF) {
+    assert(Mask->getType()->isVectorTy() && "Shuffle needs vector constant.");
+    // Not possible to express a shuffle mask for a scalable vector for this
+    // case.
+    if (isa<ScalableVectorType>(Mask->getType()))
+      return false;
+    SmallVector<int, 16> MaskAsInts;
+    getShuffleMask(Mask, MaskAsInts);
+    return isReplicationMask(MaskAsInts, ReplicationFactor, VF);
+  }
+
+  /// Return true if this shuffle mask is a replication mask.
+  bool isReplicationMask(int &ReplicationFactor, int &VF) const;
+
   /// Change values in a shuffle permute mask assuming the two vector operands
   /// of length InVecNumElts have swapped position.
   static void commuteShuffleMask(MutableArrayRef<int> Mask,
@@ -3281,14 +3350,14 @@ public:
     CaseHandle(SwitchInst *SI, ptrdiff_t Index) : CaseHandleImpl(SI, Index) {}
 
     /// Sets the new value for current case.
-    void setValue(ConstantInt *V) {
+    void setValue(ConstantInt *V) const {
       assert((unsigned)Index < SI->getNumCases() &&
              "Index out the number of cases.");
       SI->setOperand(2 + Index*2, reinterpret_cast<Value*>(V));
     }
 
     /// Sets the new successor for current case.
-    void setSuccessor(BasicBlock *S) {
+    void setSuccessor(BasicBlock *S) const {
       SI->setSuccessor(getSuccessorIndex(), S);
     }
   };
@@ -3297,7 +3366,7 @@ public:
   class CaseIteratorImpl
       : public iterator_facade_base<CaseIteratorImpl<CaseHandleT>,
                                     std::random_access_iterator_tag,
-                                    CaseHandleT> {
+                                    const CaseHandleT> {
     using SwitchInstT = typename CaseHandleT::SwitchInstType;
 
     CaseHandleT Case;
@@ -3356,7 +3425,6 @@ public:
       assert(Case.SI == RHS.Case.SI && "Incompatible operators.");
       return Case.Index < RHS.Case.Index;
     }
-    CaseHandleT &operator*() { return Case; }
     const CaseHandleT &operator*() const { return Case; }
   };
 
@@ -3446,15 +3514,12 @@ public:
   /// default case iterator to indicate that it is handled by the default
   /// handler.
   CaseIt findCaseValue(const ConstantInt *C) {
-    CaseIt I = llvm::find_if(
-        cases(), [C](CaseHandle &Case) { return Case.getCaseValue() == C; });
-    if (I != case_end())
-      return I;
-
-    return case_default();
+    return CaseIt(
+        this,
+        const_cast<const SwitchInst *>(this)->findCaseValue(C)->getCaseIndex());
   }
   ConstCaseIt findCaseValue(const ConstantInt *C) const {
-    ConstCaseIt I = llvm::find_if(cases(), [C](ConstCaseHandle &Case) {
+    ConstCaseIt I = llvm::find_if(cases(), [C](const ConstCaseHandle &Case) {
       return Case.getCaseValue() == C;
     });
     if (I != case_end())
@@ -4069,14 +4134,12 @@ public:
   ///
   Value *getIndirectDestLabel(unsigned i) const {
     assert(i < getNumIndirectDests() && "Out of bounds!");
-    return getOperand(i + getNumArgOperands() + getNumTotalBundleOperands() +
-                      1);
+    return getOperand(i + arg_size() + getNumTotalBundleOperands() + 1);
   }
 
   Value *getIndirectDestLabelUse(unsigned i) const {
     assert(i < getNumIndirectDests() && "Out of bounds!");
-    return getOperandUse(i + getNumArgOperands() + getNumTotalBundleOperands() +
-                         1);
+    return getOperandUse(i + arg_size() + getNumTotalBundleOperands() + 1);
   }
 
   // Return the destination basic blocks...
diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h
index 6b42cb949050..d186029db8cf 100644
--- a/llvm/include/llvm/IR/IntrinsicInst.h
+++ b/llvm/include/llvm/IR/IntrinsicInst.h
@@ -448,6 +448,28 @@ public:
   static Optional<unsigned> getFunctionalOpcodeForVP(Intrinsic::ID ID);
 };
 
+/// This represents vector predication reduction intrinsics.
+class VPReductionIntrinsic : public VPIntrinsic {
+public:
+  static bool isVPReduction(Intrinsic::ID ID);
+
+  unsigned getStartParamPos() const;
+  unsigned getVectorParamPos() const;
+
+  static Optional<unsigned> getStartParamPos(Intrinsic::ID ID);
+  static Optional<unsigned> getVectorParamPos(Intrinsic::ID ID);
+
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
+  /// @{
+  static bool classof(const IntrinsicInst *I) {
+    return VPReductionIntrinsic::isVPReduction(I->getIntrinsicID());
+  }
+  static bool classof(const Value *V) {
+    return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+  }
+  /// @}
+};
+
 /// This is the common base class for constrained floating point intrinsics.
 class ConstrainedFPIntrinsic : public IntrinsicInst {
 public:
diff --git a/llvm/include/llvm/IR/Intrinsics.h b/llvm/include/llvm/IR/Intrinsics.h
index 80a2f5a8cd3e..2ff48380ac28 100644
--- a/llvm/include/llvm/IR/Intrinsics.h
+++ b/llvm/include/llvm/IR/Intrinsics.h
@@ -140,7 +140,8 @@ namespace Intrinsic {
       Subdivide2Argument,
       Subdivide4Argument,
       VecOfBitcastsToInt,
-      AMX
+      AMX,
+      PPCQuad,
     } Kind;
 
     union {
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 28fcc13266b1..637e6d8f6cf5 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -312,6 +312,8 @@ def llvm_v1i128_ty     : LLVMType<v1i128>;   //  1 x i128
 def llvm_v2f16_ty      : LLVMType<v2f16>;    //  2 x half (__fp16)
 def llvm_v4f16_ty      : LLVMType<v4f16>;    //  4 x half (__fp16)
 def llvm_v8f16_ty      : LLVMType<v8f16>;    //  8 x half (__fp16)
+def llvm_v16f16_ty     : LLVMType<v16f16>;   // 16 x half (__fp16)
+def llvm_v32f16_ty     : LLVMType<v32f16>;   // 32 x half (__fp16)
 def llvm_v2bf16_ty     : LLVMType<v2bf16>;   //  2 x bfloat (__bf16)
 def llvm_v4bf16_ty     : LLVMType<v4bf16>;   //  4 x bfloat (__bf16)
 def llvm_v8bf16_ty     : LLVMType<v8bf16>;   //  8 x bfloat (__bf16)
@@ -1329,10 +1331,10 @@ def int_donothing : DefaultAttrsIntrinsic<[], [], [IntrNoMem, IntrWillReturn]>;
 def int_sideeffect : DefaultAttrsIntrinsic<[], [], [IntrInaccessibleMemOnly, IntrWillReturn]>;
 
 // The pseudoprobe intrinsic works as a place holder to the block it probes.
-// Like the sideeffect intrinsic defined above, this intrinsic is treated by the 
-// optimizer as having opaque side effects so that it won't be get rid of or moved 
+// Like the sideeffect intrinsic defined above, this intrinsic is treated by the
+// optimizer as having opaque side effects so that it won't be get rid of or moved
 // out of the block it probes.
-def int_pseudoprobe : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i64_ty],
+def int_pseudoprobe : DefaultAttrsIntrinsic<[], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i64_ty],
                                     [IntrInaccessibleMemOnly, IntrWillReturn]>;
 
 // Arithmetic fence intrinsic.
@@ -1497,12 +1499,96 @@ let IntrProperties =
                                 LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
                                 llvm_i32_ty]>;
 }
+// Shuffles.
+def int_vp_select : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
+                              [ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+                                LLVMMatchType<0>,
+                                LLVMMatchType<0>,
+                                llvm_i32_ty]>;
+
+// Reductions
+let IntrProperties = [IntrSpeculatable, IntrNoMem, IntrNoSync, IntrWillReturn] in {
+  def int_vp_reduce_fadd : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
+                                    [LLVMVectorElementType<0>,
+                                     llvm_anyvector_ty,
+                                     LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+                                     llvm_i32_ty]>;
+  def int_vp_reduce_fmul : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
+                                    [LLVMVectorElementType<0>,
+                                     llvm_anyvector_ty,
+                                     LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+                                     llvm_i32_ty]>;
+  def int_vp_reduce_add  : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
+                                    [LLVMVectorElementType<0>,
+                                     llvm_anyvector_ty,
+                                     LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+                                     llvm_i32_ty]>;
+  def int_vp_reduce_mul : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
+                                    [LLVMVectorElementType<0>,
+                                     llvm_anyvector_ty,
+                                     LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+                                     llvm_i32_ty]>;
+  def int_vp_reduce_and : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
+                                    [LLVMVectorElementType<0>,
+                                     llvm_anyvector_ty,
+                                     LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+                                     llvm_i32_ty]>;
+  def int_vp_reduce_or : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
+                                    [LLVMVectorElementType<0>,
+                                     llvm_anyvector_ty,
+                                     LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+                                     llvm_i32_ty]>;
+  def int_vp_reduce_xor : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
+                                    [LLVMVectorElementType<0>,
+                                     llvm_anyvector_ty,
+                                     LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+                                     llvm_i32_ty]>;
+  def int_vp_reduce_smax : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
+                                    [LLVMVectorElementType<0>,
+                                     llvm_anyvector_ty,
+                                     LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+                                     llvm_i32_ty]>;
+  def int_vp_reduce_smin : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
+                                    [LLVMVectorElementType<0>,
+                                     llvm_anyvector_ty,
+                                     LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+                                     llvm_i32_ty]>;
+  def int_vp_reduce_umax : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
+                                    [LLVMVectorElementType<0>,
+                                     llvm_anyvector_ty,
+                                     LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+                                     llvm_i32_ty]>;
+  def int_vp_reduce_umin : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
+                                    [LLVMVectorElementType<0>,
+                                     llvm_anyvector_ty,
+                                     LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+                                     llvm_i32_ty]>;
+  def int_vp_reduce_fmax : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
+                                    [LLVMVectorElementType<0>,
+                                     llvm_anyvector_ty,
+                                     LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+                                     llvm_i32_ty]>;
+  def int_vp_reduce_fmin : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
+                                    [LLVMVectorElementType<0>,
+                                     llvm_anyvector_ty,
+                                     LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+                                     llvm_i32_ty]>;
+}
 
 def int_get_active_lane_mask:
   DefaultAttrsIntrinsic<[llvm_anyvector_ty],
             [llvm_anyint_ty, LLVMMatchType<1>],
             [IntrNoMem, IntrNoSync, IntrWillReturn]>;
 
+def int_experimental_vp_splice:
+  DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+            [LLVMMatchType<0>,
+             LLVMMatchType<0>,
+             llvm_i32_ty,
+             LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+             llvm_i32_ty, llvm_i32_ty],
+            [IntrNoMem, ImmArg<ArgIndex<2>>]>;
+
 //===-------------------------- Masked Intrinsics -------------------------===//
 //
 def int_masked_load:
@@ -1558,12 +1644,15 @@ def int_icall_branch_funnel : DefaultAttrsIntrinsic<[], [llvm_vararg_ty], []>;
 def int_load_relative: DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_anyint_ty],
                                  [IntrReadMem, IntrArgMemOnly]>;
 
+def int_asan_check_memaccess :
+  Intrinsic<[],[llvm_ptr_ty, llvm_i32_ty], [ImmArg<ArgIndex<1>>]>;
+
 def int_hwasan_check_memaccess :
   Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty, llvm_i32_ty],
-            [IntrInaccessibleMemOnly, ImmArg<ArgIndex<2>>]>;
+            [ImmArg<ArgIndex<2>>]>;
 def int_hwasan_check_memaccess_shortgranules :
   Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty, llvm_i32_ty],
-            [IntrInaccessibleMemOnly, ImmArg<ArgIndex<2>>]>;
+            [ImmArg<ArgIndex<2>>]>;
 
 // Xray intrinsics
 //===----------------------------------------------------------------------===//
@@ -1658,7 +1747,7 @@ def int_matrix_multiply
 
 def int_matrix_column_major_load
   : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
-              [LLVMPointerToElt<0>, llvm_i64_ty, llvm_i1_ty,
+              [LLVMPointerToElt<0>, llvm_anyint_ty, llvm_i1_ty,
                llvm_i32_ty, llvm_i32_ty],
               [IntrNoSync, IntrWillReturn, IntrArgMemOnly, IntrReadMem,
                NoCapture<ArgIndex<0>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>,
@@ -1667,7 +1756,7 @@ def int_matrix_column_major_load
 def int_matrix_column_major_store
   : DefaultAttrsIntrinsic<[],
               [llvm_anyvector_ty, LLVMPointerToElt<0>,
-               llvm_i64_ty, llvm_i1_ty, llvm_i32_ty, llvm_i32_ty],
+               llvm_anyint_ty, llvm_i1_ty, llvm_i32_ty, llvm_i32_ty],
               [IntrNoSync, IntrWillReturn, IntrArgMemOnly, IntrWriteMem,
                WriteOnly<ArgIndex<1>>, NoCapture<ArgIndex<1>>,
                ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
@@ -1761,6 +1850,61 @@ def int_experimental_vector_splice : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
                                                             llvm_i32_ty],
                                                            [IntrNoMem, ImmArg<ArgIndex<2>>]>;
 
+
+//===----------------- Pointer Authentication Intrinsics ------------------===//
+//
+
+// Sign an unauthenticated pointer using the specified key and discriminator,
+// passed in that order.
+// Returns the first argument, with some known bits replaced with a signature.
+def int_ptrauth_sign : Intrinsic<[llvm_i64_ty],
+                                 [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty],
+                                 [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+
+// Authenticate a signed pointer, using the specified key and discriminator.
+// Returns the first argument, with the signature bits removed.
+// The signature must be valid.
+def int_ptrauth_auth : Intrinsic<[llvm_i64_ty],
+                                 [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty],
+                                 [IntrNoMem,ImmArg<ArgIndex<1>>]>;
+
+// Authenticate a signed pointer and resign it.
+// The second (key) and third (discriminator) arguments specify the signing
+// schema used for authenticating.
+// The fourth and fifth arguments specify the schema used for signing.
+// The signature must be valid.
+// This is a combined form of @llvm.ptrauth.sign and @llvm.ptrauth.auth, with
+// an additional integrity guarantee on the intermediate value.
+def int_ptrauth_resign : Intrinsic<[llvm_i64_ty],
+                                   [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty,
+                                    llvm_i32_ty, llvm_i64_ty],
+                                   [IntrNoMem, ImmArg<ArgIndex<1>>,
+                                    ImmArg<ArgIndex<3>>]>;
+
+// Strip the embedded signature out of a signed pointer.
+// The second argument specifies the key.
+// This behaves like @llvm.ptrauth.auth, but doesn't require the signature to
+// be valid.
+def int_ptrauth_strip : Intrinsic<[llvm_i64_ty],
+                                  [llvm_i64_ty, llvm_i32_ty],
+                                  [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+
+// Blend a small integer discriminator with an address discriminator, producing
+// a new discriminator value.
+def int_ptrauth_blend : Intrinsic<[llvm_i64_ty],
+                                  [llvm_i64_ty, llvm_i64_ty],
+                                  [IntrNoMem]>;
+
+// Compute the signature of a value, using a given discriminator.
+// This differs from @llvm.ptrauth.sign in that it doesn't embed the computed
+// signature in the pointer, but instead returns the signature as a value.
+// That allows it to be used to sign non-pointer data: in that sense, it is
+// generic.  There is no generic @llvm.ptrauth.auth: instead, the signature
+// can be computed using @llvm.ptrauth.sign_generic, and compared with icmp.
+def int_ptrauth_sign_generic : Intrinsic<[llvm_i64_ty],
+                                         [llvm_i64_ty, llvm_i64_ty],
+                                         [IntrNoMem]>;
+
 //===----------------------------------------------------------------------===//
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 87e0f83f85b7..c586af45f34d 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -962,6 +962,25 @@ let TargetPrefix = "aarch64" in {  // All intrinsics start with "llvm.aarch64.".
                  LLVMPointerToElt<0>],
                 [IntrReadMem, IntrArgMemOnly]>;
 
+  class AdvSIMD_2Vec_PredLoad_Intrinsic
+    : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
+                [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+                 LLVMPointerToElt<0>],
+                [IntrReadMem, IntrArgMemOnly]>;
+
+  class AdvSIMD_3Vec_PredLoad_Intrinsic
+    : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>],
+                [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+                 LLVMPointerToElt<0>],
+                [IntrReadMem, IntrArgMemOnly]>;
+
+  class AdvSIMD_4Vec_PredLoad_Intrinsic
+    : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>,
+                 LLVMMatchType<0>],
+                [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+                 LLVMPointerToElt<0>],
+                [IntrReadMem, IntrArgMemOnly]>;
+
   class AdvSIMD_1Vec_PredLoad_WriteFFR_Intrinsic
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
                 [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
@@ -1365,7 +1384,7 @@ let TargetPrefix = "aarch64" in {  // All intrinsics start with "llvm.aarch64.".
 
   // This class of intrinsics are not intended to be useful within LLVM IR but
   // are instead here to support some of the more regid parts of the ACLE.
-  class Builtin_SVCVT<string name, LLVMType OUT, LLVMType PRED, LLVMType IN>
+  class Builtin_SVCVT<LLVMType OUT, LLVMType PRED, LLVMType IN>
       : DefaultAttrsIntrinsic<[OUT], [OUT, PRED, IN], [IntrNoMem]>;
 }
 
@@ -1535,6 +1554,10 @@ def int_aarch64_sve_ld2 : AdvSIMD_ManyVec_PredLoad_Intrinsic;
 def int_aarch64_sve_ld3 : AdvSIMD_ManyVec_PredLoad_Intrinsic;
 def int_aarch64_sve_ld4 : AdvSIMD_ManyVec_PredLoad_Intrinsic;
 
+def int_aarch64_sve_ld2_sret : AdvSIMD_2Vec_PredLoad_Intrinsic;
+def int_aarch64_sve_ld3_sret : AdvSIMD_3Vec_PredLoad_Intrinsic;
+def int_aarch64_sve_ld4_sret : AdvSIMD_4Vec_PredLoad_Intrinsic;
+
 def int_aarch64_sve_ldnt1 : AdvSIMD_1Vec_PredLoad_Intrinsic;
 def int_aarch64_sve_ldnf1 : AdvSIMD_1Vec_PredLoad_WriteFFR_Intrinsic;
 def int_aarch64_sve_ldff1 : AdvSIMD_1Vec_PredLoad_WriteFFR_Intrinsic;
@@ -1957,44 +1980,44 @@ def int_aarch64_sve_fcmpgt : AdvSIMD_SVE_Compare_Intrinsic;
 def int_aarch64_sve_fcmpne : AdvSIMD_SVE_Compare_Intrinsic;
 def int_aarch64_sve_fcmpuo : AdvSIMD_SVE_Compare_Intrinsic;
 
-def int_aarch64_sve_fcvtzs_i32f16   : Builtin_SVCVT<"svcvt_s32_f16_m", llvm_nxv4i32_ty, llvm_nxv4i1_ty, llvm_nxv8f16_ty>;
-def int_aarch64_sve_fcvtzs_i32f64   : Builtin_SVCVT<"svcvt_s32_f64_m", llvm_nxv4i32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
-def int_aarch64_sve_fcvtzs_i64f16   : Builtin_SVCVT<"svcvt_s64_f16_m", llvm_nxv2i64_ty, llvm_nxv2i1_ty, llvm_nxv8f16_ty>;
-def int_aarch64_sve_fcvtzs_i64f32   : Builtin_SVCVT<"svcvt_s64_f32_m", llvm_nxv2i64_ty, llvm_nxv2i1_ty, llvm_nxv4f32_ty>;
+def int_aarch64_sve_fcvtzs_i32f16   : Builtin_SVCVT<llvm_nxv4i32_ty, llvm_nxv4i1_ty, llvm_nxv8f16_ty>;
+def int_aarch64_sve_fcvtzs_i32f64   : Builtin_SVCVT<llvm_nxv4i32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
+def int_aarch64_sve_fcvtzs_i64f16   : Builtin_SVCVT<llvm_nxv2i64_ty, llvm_nxv2i1_ty, llvm_nxv8f16_ty>;
+def int_aarch64_sve_fcvtzs_i64f32   : Builtin_SVCVT<llvm_nxv2i64_ty, llvm_nxv2i1_ty, llvm_nxv4f32_ty>;
 
-def int_aarch64_sve_fcvt_bf16f32    : Builtin_SVCVT<"svcvt_bf16_f32_m",   llvm_nxv8bf16_ty, llvm_nxv8i1_ty, llvm_nxv4f32_ty>;
-def int_aarch64_sve_fcvtnt_bf16f32  : Builtin_SVCVT<"svcvtnt_bf16_f32_m", llvm_nxv8bf16_ty, llvm_nxv8i1_ty, llvm_nxv4f32_ty>;
+def int_aarch64_sve_fcvt_bf16f32    : Builtin_SVCVT<llvm_nxv8bf16_ty, llvm_nxv8i1_ty, llvm_nxv4f32_ty>;
+def int_aarch64_sve_fcvtnt_bf16f32  : Builtin_SVCVT<llvm_nxv8bf16_ty, llvm_nxv8i1_ty, llvm_nxv4f32_ty>;
 
-def int_aarch64_sve_fcvtzu_i32f16   : Builtin_SVCVT<"svcvt_u32_f16_m", llvm_nxv4i32_ty, llvm_nxv4i1_ty, llvm_nxv8f16_ty>;
-def int_aarch64_sve_fcvtzu_i32f64   : Builtin_SVCVT<"svcvt_u32_f64_m", llvm_nxv4i32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
-def int_aarch64_sve_fcvtzu_i64f16   : Builtin_SVCVT<"svcvt_u64_f16_m", llvm_nxv2i64_ty, llvm_nxv2i1_ty, llvm_nxv8f16_ty>;
-def int_aarch64_sve_fcvtzu_i64f32   : Builtin_SVCVT<"svcvt_u64_f32_m", llvm_nxv2i64_ty, llvm_nxv2i1_ty, llvm_nxv4f32_ty>;
+def int_aarch64_sve_fcvtzu_i32f16   : Builtin_SVCVT<llvm_nxv4i32_ty, llvm_nxv4i1_ty, llvm_nxv8f16_ty>;
+def int_aarch64_sve_fcvtzu_i32f64   : Builtin_SVCVT<llvm_nxv4i32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
+def int_aarch64_sve_fcvtzu_i64f16   : Builtin_SVCVT<llvm_nxv2i64_ty, llvm_nxv2i1_ty, llvm_nxv8f16_ty>;
+def int_aarch64_sve_fcvtzu_i64f32   : Builtin_SVCVT<llvm_nxv2i64_ty, llvm_nxv2i1_ty, llvm_nxv4f32_ty>;
 
-def int_aarch64_sve_fcvt_f16f32     : Builtin_SVCVT<"svcvt_f16_f32_m", llvm_nxv8f16_ty, llvm_nxv4i1_ty, llvm_nxv4f32_ty>;
-def int_aarch64_sve_fcvt_f16f64     : Builtin_SVCVT<"svcvt_f16_f64_m", llvm_nxv8f16_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
-def int_aarch64_sve_fcvt_f32f64     : Builtin_SVCVT<"svcvt_f32_f64_m", llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
+def int_aarch64_sve_fcvt_f16f32     : Builtin_SVCVT<llvm_nxv8f16_ty, llvm_nxv4i1_ty, llvm_nxv4f32_ty>;
+def int_aarch64_sve_fcvt_f16f64     : Builtin_SVCVT<llvm_nxv8f16_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
+def int_aarch64_sve_fcvt_f32f64     : Builtin_SVCVT<llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
 
-def int_aarch64_sve_fcvt_f32f16     : Builtin_SVCVT<"svcvt_f32_f16_m", llvm_nxv4f32_ty, llvm_nxv4i1_ty, llvm_nxv8f16_ty>;
-def int_aarch64_sve_fcvt_f64f16     : Builtin_SVCVT<"svcvt_f64_f16_m", llvm_nxv2f64_ty, llvm_nxv2i1_ty, llvm_nxv8f16_ty>;
-def int_aarch64_sve_fcvt_f64f32     : Builtin_SVCVT<"svcvt_f64_f32_m", llvm_nxv2f64_ty, llvm_nxv2i1_ty, llvm_nxv4f32_ty>;
+def int_aarch64_sve_fcvt_f32f16     : Builtin_SVCVT<llvm_nxv4f32_ty, llvm_nxv4i1_ty, llvm_nxv8f16_ty>;
+def int_aarch64_sve_fcvt_f64f16     : Builtin_SVCVT<llvm_nxv2f64_ty, llvm_nxv2i1_ty, llvm_nxv8f16_ty>;
+def int_aarch64_sve_fcvt_f64f32     : Builtin_SVCVT<llvm_nxv2f64_ty, llvm_nxv2i1_ty, llvm_nxv4f32_ty>;
 
-def int_aarch64_sve_fcvtlt_f32f16   : Builtin_SVCVT<"svcvtlt_f32_f16_m", llvm_nxv4f32_ty, llvm_nxv4i1_ty, llvm_nxv8f16_ty>;
-def int_aarch64_sve_fcvtlt_f64f32   : Builtin_SVCVT<"svcvtlt_f64_f32_m", llvm_nxv2f64_ty, llvm_nxv2i1_ty, llvm_nxv4f32_ty>;
-def int_aarch64_sve_fcvtnt_f16f32   : Builtin_SVCVT<"svcvtnt_f16_f32_m", llvm_nxv8f16_ty, llvm_nxv4i1_ty, llvm_nxv4f32_ty>;
-def int_aarch64_sve_fcvtnt_f32f64   : Builtin_SVCVT<"svcvtnt_f32_f64_m", llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
+def int_aarch64_sve_fcvtlt_f32f16   : Builtin_SVCVT<llvm_nxv4f32_ty, llvm_nxv4i1_ty, llvm_nxv8f16_ty>;
+def int_aarch64_sve_fcvtlt_f64f32   : Builtin_SVCVT<llvm_nxv2f64_ty, llvm_nxv2i1_ty, llvm_nxv4f32_ty>;
+def int_aarch64_sve_fcvtnt_f16f32   : Builtin_SVCVT<llvm_nxv8f16_ty, llvm_nxv4i1_ty, llvm_nxv4f32_ty>;
+def int_aarch64_sve_fcvtnt_f32f64   : Builtin_SVCVT<llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
 
-def int_aarch64_sve_fcvtx_f32f64    : Builtin_SVCVT<"svcvtx_f32_f64_m", llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
-def int_aarch64_sve_fcvtxnt_f32f64  : Builtin_SVCVT<"svcvtxnt_f32_f64_m", llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
+def int_aarch64_sve_fcvtx_f32f64    : Builtin_SVCVT<llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
+def int_aarch64_sve_fcvtxnt_f32f64  : Builtin_SVCVT<llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
 
-def int_aarch64_sve_scvtf_f16i32    : Builtin_SVCVT<"svcvt_f16_s32_m", llvm_nxv8f16_ty, llvm_nxv4i1_ty, llvm_nxv4i32_ty>;
-def int_aarch64_sve_scvtf_f16i64    : Builtin_SVCVT<"svcvt_f16_s64_m", llvm_nxv8f16_ty, llvm_nxv2i1_ty, llvm_nxv2i64_ty>;
-def int_aarch64_sve_scvtf_f32i64    : Builtin_SVCVT<"svcvt_f32_s64_m", llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2i64_ty>;
-def int_aarch64_sve_scvtf_f64i32    : Builtin_SVCVT<"svcvt_f64_s32_m", llvm_nxv2f64_ty, llvm_nxv2i1_ty, llvm_nxv4i32_ty>;
+def int_aarch64_sve_scvtf_f16i32    : Builtin_SVCVT<llvm_nxv8f16_ty, llvm_nxv4i1_ty, llvm_nxv4i32_ty>;
+def int_aarch64_sve_scvtf_f16i64    : Builtin_SVCVT<llvm_nxv8f16_ty, llvm_nxv2i1_ty, llvm_nxv2i64_ty>;
+def int_aarch64_sve_scvtf_f32i64    : Builtin_SVCVT<llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2i64_ty>;
+def int_aarch64_sve_scvtf_f64i32    : Builtin_SVCVT<llvm_nxv2f64_ty, llvm_nxv2i1_ty, llvm_nxv4i32_ty>;
 
-def int_aarch64_sve_ucvtf_f16i32    : Builtin_SVCVT<"svcvt_f16_u32_m", llvm_nxv8f16_ty, llvm_nxv4i1_ty, llvm_nxv4i32_ty>;
-def int_aarch64_sve_ucvtf_f16i64    : Builtin_SVCVT<"svcvt_f16_u64_m", llvm_nxv8f16_ty, llvm_nxv2i1_ty, llvm_nxv2i64_ty>;
-def int_aarch64_sve_ucvtf_f32i64    : Builtin_SVCVT<"svcvt_f32_u64_m", llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2i64_ty>;
-def int_aarch64_sve_ucvtf_f64i32    : Builtin_SVCVT<"svcvt_f64_u32_m", llvm_nxv2f64_ty, llvm_nxv2i1_ty, llvm_nxv4i32_ty>;
+def int_aarch64_sve_ucvtf_f16i32    : Builtin_SVCVT<llvm_nxv8f16_ty, llvm_nxv4i1_ty, llvm_nxv4i32_ty>;
+def int_aarch64_sve_ucvtf_f16i64    : Builtin_SVCVT<llvm_nxv8f16_ty, llvm_nxv2i1_ty, llvm_nxv2i64_ty>;
+def int_aarch64_sve_ucvtf_f32i64    : Builtin_SVCVT<llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2i64_ty>;
+def int_aarch64_sve_ucvtf_f64i32    : Builtin_SVCVT<llvm_nxv2f64_ty, llvm_nxv2i1_ty, llvm_nxv4i32_ty>;
 
 //
 // Predicate creation
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 46a7aeb39c9a..0a44670de76e 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -684,7 +684,14 @@ class AMDGPUDimAtomicProfile<string opmod,
   let IsAtomic = true;
 }
 
-class AMDGPUDimGetResInfoProfile<AMDGPUDimProps dim> : AMDGPUDimProfile<"GET_RESINFO", dim> {
+class AMDGPUDimAtomicFloatProfile<string opmod, AMDGPUDimProps dim,
+                                  list<AMDGPUArg> dataargs>
+    : AMDGPUDimAtomicProfile<opmod, dim, dataargs> {
+  let RetTypes = [llvm_anyfloat_ty];
+}
+
+class AMDGPUDimGetResInfoProfile<AMDGPUDimProps dim>
+    : AMDGPUDimProfile<"GET_RESINFO", dim> {
   let RetTypes = [llvm_anyfloat_ty];
   let DataArgs = [];
   let AddrArgs = [AMDGPUArg<llvm_anyint_ty, "mip">];
@@ -860,17 +867,24 @@ defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimIntrinsics = {
 // atomic intrinsics
 //////////////////////////////////////////////////////////////////////////
 defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimAtomicIntrinsics = {
-  multiclass AMDGPUImageDimAtomicX<string opmod, list<AMDGPUArg> dataargs> {
-    foreach dim = AMDGPUDims.All in {
-      def !strconcat(NAME, "_", dim.Name)
-        : AMDGPUImageDimIntrinsic<
-            AMDGPUDimAtomicProfile<opmod, dim, dataargs>,
-            [], [SDNPMemOperand]>;
-    }
+  multiclass AMDGPUImageDimAtomicX<string opmod, list<AMDGPUArg> dataargs,
+                                   int isFloat = 0> {
+        foreach dim = AMDGPUDims.All in {
+          def !strconcat(NAME, "_", dim.Name): AMDGPUImageDimIntrinsic<
+              !if (isFloat, AMDGPUDimAtomicFloatProfile<opmod, dim, dataargs>,
+                   AMDGPUDimAtomicProfile<opmod, dim, dataargs>),
+              [], [SDNPMemOperand]>;
+        }
+  }
+
+  multiclass AMDGPUImageDimAtomic<string opmod, int isFloat = 0> {
+    defm ""
+        : AMDGPUImageDimAtomicX<opmod, [AMDGPUArg<LLVMMatchType<0>, "vdata">],
+                                isFloat>;
   }
 
-  multiclass AMDGPUImageDimAtomic<string opmod> {
-    defm "" : AMDGPUImageDimAtomicX<opmod, [AMDGPUArg<LLVMMatchType<0>, "vdata">]>;
+  multiclass AMDGPUImageDimFloatAtomic<string opmod> {
+    defm "" : AMDGPUImageDimAtomic<opmod, 1 /*isFloat*/>;
   }
 
   defm int_amdgcn_image_atomic_swap : AMDGPUImageDimAtomic<"ATOMIC_SWAP">;
@@ -878,8 +892,10 @@ defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimAtomicIntrinsics = {
   defm int_amdgcn_image_atomic_sub : AMDGPUImageDimAtomic<"ATOMIC_SUB">;
   defm int_amdgcn_image_atomic_smin : AMDGPUImageDimAtomic<"ATOMIC_SMIN">;
   defm int_amdgcn_image_atomic_umin : AMDGPUImageDimAtomic<"ATOMIC_UMIN">;
+  defm int_amdgcn_image_atomic_fmin : AMDGPUImageDimFloatAtomic<"ATOMIC_FMIN">;
   defm int_amdgcn_image_atomic_smax : AMDGPUImageDimAtomic<"ATOMIC_SMAX">;
   defm int_amdgcn_image_atomic_umax : AMDGPUImageDimAtomic<"ATOMIC_UMAX">;
+  defm int_amdgcn_image_atomic_fmax : AMDGPUImageDimFloatAtomic<"ATOMIC_FMAX">;
   defm int_amdgcn_image_atomic_and : AMDGPUImageDimAtomic<"ATOMIC_AND">;
   defm int_amdgcn_image_atomic_or : AMDGPUImageDimAtomic<"ATOMIC_OR">;
   defm int_amdgcn_image_atomic_xor : AMDGPUImageDimAtomic<"ATOMIC_XOR">;
@@ -1015,8 +1031,10 @@ def int_amdgcn_raw_buffer_atomic_add : AMDGPURawBufferAtomic;
 def int_amdgcn_raw_buffer_atomic_sub : AMDGPURawBufferAtomic;
 def int_amdgcn_raw_buffer_atomic_smin : AMDGPURawBufferAtomic;
 def int_amdgcn_raw_buffer_atomic_umin : AMDGPURawBufferAtomic;
+def int_amdgcn_raw_buffer_atomic_fmin : AMDGPURawBufferAtomic<llvm_anyfloat_ty>;
 def int_amdgcn_raw_buffer_atomic_smax : AMDGPURawBufferAtomic;
 def int_amdgcn_raw_buffer_atomic_umax : AMDGPURawBufferAtomic;
+def int_amdgcn_raw_buffer_atomic_fmax : AMDGPURawBufferAtomic<llvm_anyfloat_ty>;
 def int_amdgcn_raw_buffer_atomic_and : AMDGPURawBufferAtomic;
 def int_amdgcn_raw_buffer_atomic_or : AMDGPURawBufferAtomic;
 def int_amdgcn_raw_buffer_atomic_xor : AMDGPURawBufferAtomic;
@@ -1036,10 +1054,6 @@ def int_amdgcn_raw_buffer_atomic_cmpswap : Intrinsic<
 // gfx908 intrinsic
 def int_amdgcn_raw_buffer_atomic_fadd : AMDGPURawBufferAtomic<llvm_anyfloat_ty>;
 
-// gfx90a intrinsics
-def int_amdgcn_raw_buffer_atomic_fmin : AMDGPURawBufferAtomic<llvm_anyfloat_ty>;
-def int_amdgcn_raw_buffer_atomic_fmax : AMDGPURawBufferAtomic<llvm_anyfloat_ty>;
-
 class AMDGPUStructBufferAtomic<LLVMType data_ty = llvm_any_ty, bit NoRtn = false> : Intrinsic <
   !if(NoRtn, [], [data_ty]),
   [!if(NoRtn, data_ty, LLVMMatchType<0>),  // vdata(VGPR)
@@ -1521,6 +1535,16 @@ def int_amdgcn_mul_u24 : Intrinsic<[llvm_i32_ty],
   [IntrNoMem, IntrSpeculatable, IntrWillReturn]
 >;
 
+def int_amdgcn_mulhi_i24 : Intrinsic<[llvm_i32_ty],
+  [llvm_i32_ty, llvm_i32_ty],
+  [IntrNoMem, IntrSpeculatable, IntrWillReturn]
+>;
+
+def int_amdgcn_mulhi_u24 : Intrinsic<[llvm_i32_ty],
+  [llvm_i32_ty, llvm_i32_ty],
+  [IntrNoMem, IntrSpeculatable, IntrWillReturn]
+>;
+
 // llvm.amdgcn.ds.gws.init(i32 bar_val, i32 resource_id)
 //
 // bar_val is the total number of waves that will wait on this
diff --git a/llvm/include/llvm/IR/IntrinsicsBPF.td b/llvm/include/llvm/IR/IntrinsicsBPF.td
index 4b4dd94b1599..a6bd6f841aab 100644
--- a/llvm/include/llvm/IR/IntrinsicsBPF.td
+++ b/llvm/include/llvm/IR/IntrinsicsBPF.td
@@ -34,4 +34,7 @@ let TargetPrefix = "bpf" in {  // All intrinsics start with "llvm.bpf."
               [IntrNoMem]>;
   def int_bpf_passthrough : GCCBuiltin<"__builtin_bpf_passthrough">,
               Intrinsic<[llvm_any_ty], [llvm_i32_ty, llvm_any_ty], [IntrNoMem]>;
+  def int_bpf_compare : GCCBuiltin<"__builtin_bpf_compare">,
+              Intrinsic<[llvm_i1_ty], [llvm_i32_ty, llvm_anyint_ty, llvm_anyint_ty],
+              [IntrNoMem]>;
 }
diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index cc43d23bec1c..6f55d1ef730e 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -43,7 +43,7 @@ def llvm_shared_i64ptr_ty : LLVMQualPointerType<llvm_i64_ty, 3>; // (shared)i64*
 
 // Helper class that represents a 'fragment' of an NVPTX *MMA instruction.
 // Geom: m<M>n<N>k<K>. E.g. m8n32k16
-// Frag: [abcd]
+// Frag: [a|b|c|d] ([x1|x2|x4] for ldmatrix)
 // PtxEltType: PTX type for the element.
 class WMMA_REGS<string Geom, string Frag, string PtxEltType> {
   string geom = Geom;
@@ -190,6 +190,11 @@ class WMMA_REGS<string Geom, string Frag, string PtxEltType> {
     !eq(gft,"m16n8k256:b:b1") : !listsplat(llvm_i32_ty, 2),
     !eq(gft,"m16n8k256:c:s32") : !listsplat(llvm_i32_ty, 4),
     !eq(gft,"m16n8k256:d:s32") : !listsplat(llvm_i32_ty, 4),
+
+    // ldmatrix b16 -> s32 @ m8n8
+    !eq(gft,"m8n8:x1:b16") : !listsplat(llvm_i32_ty, 1),
+    !eq(gft,"m8n8:x2:b16") : !listsplat(llvm_i32_ty, 2),
+    !eq(gft,"m8n8:x4:b16") : !listsplat(llvm_i32_ty, 4),
   );
 }
 
@@ -256,6 +261,17 @@ class MMA_NAME<string ALayout, string BLayout, int Satfinite, string b1op,
                   !subst("llvm.", "int_", llvm));
 }
 
+class LDMATRIX_NAME<WMMA_REGS Frag, int Trans> {
+  string intr = "llvm.nvvm.ldmatrix.sync.aligned"
+                # "." # Frag.geom
+                # "." # Frag.frag
+                # !if(Trans, ".trans", "")
+                # "." # Frag.ptx_elt_type
+                ;
+  string record = !subst(".", "_",
+                  !subst("llvm.", "int_", intr));
+}
+
 // Generates list of 4-tuples of WMMA_REGS representing a valid MMA op.
 //   Geom: list of supported geometries.
 //   TypeN: PTX type of the corresponding fragment's element.
@@ -286,9 +302,19 @@ class MMA_LDST_OPS<list<string> Geom, list<string> Frags, list<string> Types> {
    list<string> ops = !foreach(x, ret, x.gft);
 }
 
-// Creates list of valid combinations of fragments. This is the master list that
+class LDMATRIX_OPS<list<string> Geom, list<string> Frags, list<string> Types> {
+  list<WMMA_REGS> ret =
+     !foldl([]<WMMA_REGS>, Geom, t1, geom, !listconcat(t1,
+     !foldl([]<WMMA_REGS>, Frags, t2, frag, !listconcat(t2,
+     !foldl([]<WMMA_REGS>, Types, t3, type, !listconcat(t3,
+            [WMMA_REGS<geom, frag, type>]))))));
+   // Debugging aid for readable representation of the list above.
+   list<string> ops = !foreach(x, ret, x.gft);
+}
+
+// Creates list of valid combinations of fragments. This is the main list that
 // drives generation of corresponding intrinsics and instructions.
-class NVVM_MMA_OPS<int _ = 0> {
+class NVVM_MMA_OPS {
   list<list<WMMA_REGS>> tf32_wmma_ops = MMA_OPS<
             ["m16n16k8"],
             ["tf32"], [], ["f32"], []>.ret;
@@ -370,11 +396,14 @@ class NVVM_MMA_OPS<int _ = 0> {
   // Separate A/B/C fragments (loads) from D (stores).
   list<WMMA_REGS> all_ld_ops = !filter(op, all_ldst_ops, !ne(op.frag, "d"));
   list<WMMA_REGS> all_st_ops = !filter(op, all_ldst_ops, !eq(op.frag, "d"));
+
+  list<WMMA_REGS> ldmatrix_b16_ops = LDMATRIX_OPS<
+    ["m8n8"], ["x1", "x2", "x4"], ["b16"]>.ret;
+  list<WMMA_REGS> all_ldmatrix_ops = ldmatrix_b16_ops;
 }
 
 def NVVM_MMA_OPS : NVVM_MMA_OPS;
 
-
 // Returns true if this combination of fragment and layout for WMMA load/store
 // ops is supported; false otherwise.
 // E.g.
@@ -489,6 +518,23 @@ class NVVM_MMA_SUPPORTED<list<WMMA_REGS> frags, string layout_a, string layout_b
   );
 }
 
+// Returns true if the fragment is valid for ldmatrix ops is supported;
+// false otherwise.
+// E.g.
+// if NVVM_LDMATRIX_SUPPORTED<...>.ret then
+//   def : FOO<>; // The record will only be defined for supported ops.
+//
+class NVVM_LDMATRIX_SUPPORTED<WMMA_REGS frag> {
+  string g = frag.geom;
+  string t = frag.ptx_elt_type;
+
+  bit ret = !cond(
+    // Only currently support m8n8 and b16
+    !and(!eq(g, "m8n8"), !eq(t, "b16")): true,
+    true: false
+  );
+}
+
 class SHFL_INFO<bit sync, string mode, string type, bit return_pred> {
   string Suffix = !if(sync, "sync_", "")
                   # mode # "_"
@@ -511,7 +557,7 @@ class SHFL_INFO<bit sync, string mode, string type, bit return_pred> {
 
 let TargetPrefix = "nvvm" in {
   def int_nvvm_prmt : GCCBuiltin<"__nvvm_prmt">,
-      Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
         [IntrNoMem, Commutative]>;
 
 //
@@ -519,150 +565,150 @@ let TargetPrefix = "nvvm" in {
 //
 
   def int_nvvm_fmin_f : GCCBuiltin<"__nvvm_fmin_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, IntrSpeculatable, Commutative]>;
   def int_nvvm_fmin_ftz_f : GCCBuiltin<"__nvvm_fmin_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, IntrSpeculatable, Commutative]>;
 
   def int_nvvm_fmax_f : GCCBuiltin<"__nvvm_fmax_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty]
-        , [IntrNoMem, Commutative]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty]
+        , [IntrNoMem, IntrSpeculatable, Commutative]>;
   def int_nvvm_fmax_ftz_f : GCCBuiltin<"__nvvm_fmax_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, IntrSpeculatable, Commutative]>;
 
   def int_nvvm_fmin_d : GCCBuiltin<"__nvvm_fmin_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
-        [IntrNoMem, Commutative]>;
+      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+        [IntrNoMem, IntrSpeculatable, Commutative]>;
   def int_nvvm_fmax_d : GCCBuiltin<"__nvvm_fmax_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
-        [IntrNoMem, Commutative]>;
+      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+        [IntrNoMem, IntrSpeculatable, Commutative]>;
 
 //
 // Multiplication
 //
 
   def int_nvvm_mulhi_i : GCCBuiltin<"__nvvm_mulhi_i">,
-      Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
-        [IntrNoMem, Commutative]>;
+      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+        [IntrNoMem, IntrSpeculatable, Commutative]>;
   def int_nvvm_mulhi_ui : GCCBuiltin<"__nvvm_mulhi_ui">,
-      Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
-        [IntrNoMem, Commutative]>;
+      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+        [IntrNoMem, IntrSpeculatable, Commutative]>;
 
   def int_nvvm_mulhi_ll : GCCBuiltin<"__nvvm_mulhi_ll">,
-      Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
-        [IntrNoMem, Commutative]>;
+      DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
+        [IntrNoMem, IntrSpeculatable, Commutative]>;
   def int_nvvm_mulhi_ull : GCCBuiltin<"__nvvm_mulhi_ull">,
-      Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
-        [IntrNoMem, Commutative]>;
+      DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
+        [IntrNoMem, IntrSpeculatable, Commutative]>;
 
   def int_nvvm_mul_rn_ftz_f : GCCBuiltin<"__nvvm_mul_rn_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, IntrSpeculatable, Commutative]>;
   def int_nvvm_mul_rn_f : GCCBuiltin<"__nvvm_mul_rn_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, IntrSpeculatable, Commutative]>;
   def int_nvvm_mul_rz_ftz_f : GCCBuiltin<"__nvvm_mul_rz_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, IntrSpeculatable, Commutative]>;
   def int_nvvm_mul_rz_f : GCCBuiltin<"__nvvm_mul_rz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, IntrSpeculatable, Commutative]>;
   def int_nvvm_mul_rm_ftz_f : GCCBuiltin<"__nvvm_mul_rm_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, IntrSpeculatable, Commutative]>;
   def int_nvvm_mul_rm_f : GCCBuiltin<"__nvvm_mul_rm_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, IntrSpeculatable, Commutative]>;
   def int_nvvm_mul_rp_ftz_f : GCCBuiltin<"__nvvm_mul_rp_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, IntrSpeculatable, Commutative]>;
   def int_nvvm_mul_rp_f : GCCBuiltin<"__nvvm_mul_rp_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, IntrSpeculatable, Commutative]>;
 
   def int_nvvm_mul_rn_d : GCCBuiltin<"__nvvm_mul_rn_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
-        [IntrNoMem, Commutative]>;
+      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+        [IntrNoMem, IntrSpeculatable, Commutative]>;
   def int_nvvm_mul_rz_d : GCCBuiltin<"__nvvm_mul_rz_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
-        [IntrNoMem, Commutative]>;
+      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+        [IntrNoMem, IntrSpeculatable, Commutative]>;
   def int_nvvm_mul_rm_d : GCCBuiltin<"__nvvm_mul_rm_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
-        [IntrNoMem, Commutative]>;
+      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+        [IntrNoMem, IntrSpeculatable, Commutative]>;
   def int_nvvm_mul_rp_d : GCCBuiltin<"__nvvm_mul_rp_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
-        [IntrNoMem, Commutative]>;
+      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+        [IntrNoMem, IntrSpeculatable, Commutative]>;
 
   def int_nvvm_mul24_i : GCCBuiltin<"__nvvm_mul24_i">,
-      Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
-        [IntrNoMem, Commutative]>;
+      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+        [IntrNoMem, IntrSpeculatable, Commutative]>;
   def int_nvvm_mul24_ui : GCCBuiltin<"__nvvm_mul24_ui">,
-      Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
-        [IntrNoMem, Commutative]>;
+      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+        [IntrNoMem, IntrSpeculatable, Commutative]>;
 
 //
 // Div
 //
 
   def int_nvvm_div_approx_ftz_f : GCCBuiltin<"__nvvm_div_approx_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+        [IntrNoMem]>;
   def int_nvvm_div_approx_f : GCCBuiltin<"__nvvm_div_approx_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+        [IntrNoMem]>;
 
   def int_nvvm_div_rn_ftz_f : GCCBuiltin<"__nvvm_div_rn_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+        [IntrNoMem]>;
   def int_nvvm_div_rn_f : GCCBuiltin<"__nvvm_div_rn_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+        [IntrNoMem]>;
 
   def int_nvvm_div_rz_ftz_f : GCCBuiltin<"__nvvm_div_rz_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+        [IntrNoMem]>;
   def int_nvvm_div_rz_f : GCCBuiltin<"__nvvm_div_rz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+        [IntrNoMem]>;
 
   def int_nvvm_div_rm_ftz_f : GCCBuiltin<"__nvvm_div_rm_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+        [IntrNoMem]>;
   def int_nvvm_div_rm_f : GCCBuiltin<"__nvvm_div_rm_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+        [IntrNoMem]>;
 
   def int_nvvm_div_rp_ftz_f : GCCBuiltin<"__nvvm_div_rp_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+        [IntrNoMem]>;
   def int_nvvm_div_rp_f : GCCBuiltin<"__nvvm_div_rp_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+        [IntrNoMem]>;
 
   def int_nvvm_div_rn_d : GCCBuiltin<"__nvvm_div_rn_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
-        [IntrNoMem, Commutative]>;
+      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+        [IntrNoMem]>;
   def int_nvvm_div_rz_d : GCCBuiltin<"__nvvm_div_rz_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
-        [IntrNoMem, Commutative]>;
+      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+        [IntrNoMem]>;
   def int_nvvm_div_rm_d : GCCBuiltin<"__nvvm_div_rm_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
-        [IntrNoMem, Commutative]>;
+      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+        [IntrNoMem]>;
   def int_nvvm_div_rp_d : GCCBuiltin<"__nvvm_div_rp_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
-        [IntrNoMem, Commutative]>;
+      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+        [IntrNoMem]>;
 
 //
 // Sad
 //
 
   def int_nvvm_sad_i : GCCBuiltin<"__nvvm_sad_i">,
-      Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
         [IntrNoMem, Commutative]>;
   def int_nvvm_sad_ui : GCCBuiltin<"__nvvm_sad_ui">,
-      Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
         [IntrNoMem, Commutative]>;
 
 //
@@ -670,493 +716,493 @@ let TargetPrefix = "nvvm" in {
 //
 
   def int_nvvm_floor_ftz_f : GCCBuiltin<"__nvvm_floor_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_floor_f : GCCBuiltin<"__nvvm_floor_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_floor_d : GCCBuiltin<"__nvvm_floor_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
 
   def int_nvvm_ceil_ftz_f : GCCBuiltin<"__nvvm_ceil_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_ceil_f : GCCBuiltin<"__nvvm_ceil_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_ceil_d : GCCBuiltin<"__nvvm_ceil_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
 
 //
 // Abs
 //
 
   def int_nvvm_fabs_ftz_f : GCCBuiltin<"__nvvm_fabs_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_fabs_f : GCCBuiltin<"__nvvm_fabs_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_fabs_d : GCCBuiltin<"__nvvm_fabs_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
 
 //
 // Round
 //
 
   def int_nvvm_round_ftz_f : GCCBuiltin<"__nvvm_round_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_round_f : GCCBuiltin<"__nvvm_round_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
 
   def int_nvvm_round_d : GCCBuiltin<"__nvvm_round_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
 
 //
 // Trunc
 //
 
   def int_nvvm_trunc_ftz_f : GCCBuiltin<"__nvvm_trunc_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_trunc_f : GCCBuiltin<"__nvvm_trunc_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
 
   def int_nvvm_trunc_d : GCCBuiltin<"__nvvm_trunc_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
 
 //
 // Saturate
 //
 
   def int_nvvm_saturate_ftz_f : GCCBuiltin<"__nvvm_saturate_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_saturate_f : GCCBuiltin<"__nvvm_saturate_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
 
   def int_nvvm_saturate_d : GCCBuiltin<"__nvvm_saturate_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
 
 //
 // Exp2  Log2
 //
 
   def int_nvvm_ex2_approx_ftz_f : GCCBuiltin<"__nvvm_ex2_approx_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
   def int_nvvm_ex2_approx_f : GCCBuiltin<"__nvvm_ex2_approx_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
   def int_nvvm_ex2_approx_d : GCCBuiltin<"__nvvm_ex2_approx_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
 
   def int_nvvm_lg2_approx_ftz_f : GCCBuiltin<"__nvvm_lg2_approx_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
   def int_nvvm_lg2_approx_f : GCCBuiltin<"__nvvm_lg2_approx_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
   def int_nvvm_lg2_approx_d : GCCBuiltin<"__nvvm_lg2_approx_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
 
 //
 // Sin  Cos
 //
 
   def int_nvvm_sin_approx_ftz_f : GCCBuiltin<"__nvvm_sin_approx_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
   def int_nvvm_sin_approx_f : GCCBuiltin<"__nvvm_sin_approx_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
 
   def int_nvvm_cos_approx_ftz_f : GCCBuiltin<"__nvvm_cos_approx_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
   def int_nvvm_cos_approx_f : GCCBuiltin<"__nvvm_cos_approx_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
 
 //
 // Fma
 //
 
   def int_nvvm_fma_rn_ftz_f : GCCBuiltin<"__nvvm_fma_rn_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_fma_rn_f : GCCBuiltin<"__nvvm_fma_rn_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_fma_rz_ftz_f : GCCBuiltin<"__nvvm_fma_rz_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_fma_rz_f : GCCBuiltin<"__nvvm_fma_rz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_fma_rm_ftz_f : GCCBuiltin<"__nvvm_fma_rm_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_fma_rm_f : GCCBuiltin<"__nvvm_fma_rm_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_fma_rp_ftz_f : GCCBuiltin<"__nvvm_fma_rp_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_fma_rp_f : GCCBuiltin<"__nvvm_fma_rp_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, IntrSpeculatable]>;
 
   def int_nvvm_fma_rn_d : GCCBuiltin<"__nvvm_fma_rn_d">,
-      Intrinsic<[llvm_double_ty],
+      DefaultAttrsIntrinsic<[llvm_double_ty],
         [llvm_double_ty, llvm_double_ty, llvm_double_ty],
-        [IntrNoMem, Commutative]>;
+        [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_fma_rz_d : GCCBuiltin<"__nvvm_fma_rz_d">,
-      Intrinsic<[llvm_double_ty],
+      DefaultAttrsIntrinsic<[llvm_double_ty],
         [llvm_double_ty, llvm_double_ty, llvm_double_ty],
-        [IntrNoMem, Commutative]>;
+        [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_fma_rm_d : GCCBuiltin<"__nvvm_fma_rm_d">,
-      Intrinsic<[llvm_double_ty],
+      DefaultAttrsIntrinsic<[llvm_double_ty],
         [llvm_double_ty, llvm_double_ty, llvm_double_ty],
-        [IntrNoMem, Commutative]>;
+        [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_fma_rp_d : GCCBuiltin<"__nvvm_fma_rp_d">,
-      Intrinsic<[llvm_double_ty],
+      DefaultAttrsIntrinsic<[llvm_double_ty],
         [llvm_double_ty, llvm_double_ty, llvm_double_ty],
-        [IntrNoMem, Commutative]>;
+        [IntrNoMem, IntrSpeculatable]>;
 
 //
 // Rcp
 //
 
   def int_nvvm_rcp_rn_ftz_f : GCCBuiltin<"__nvvm_rcp_rn_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
   def int_nvvm_rcp_rn_f : GCCBuiltin<"__nvvm_rcp_rn_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
   def int_nvvm_rcp_rz_ftz_f : GCCBuiltin<"__nvvm_rcp_rz_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
   def int_nvvm_rcp_rz_f : GCCBuiltin<"__nvvm_rcp_rz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
   def int_nvvm_rcp_rm_ftz_f : GCCBuiltin<"__nvvm_rcp_rm_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
   def int_nvvm_rcp_rm_f : GCCBuiltin<"__nvvm_rcp_rm_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
   def int_nvvm_rcp_rp_ftz_f : GCCBuiltin<"__nvvm_rcp_rp_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
   def int_nvvm_rcp_rp_f : GCCBuiltin<"__nvvm_rcp_rp_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
 
   def int_nvvm_rcp_rn_d : GCCBuiltin<"__nvvm_rcp_rn_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
   def int_nvvm_rcp_rz_d : GCCBuiltin<"__nvvm_rcp_rz_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
   def int_nvvm_rcp_rm_d : GCCBuiltin<"__nvvm_rcp_rm_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
   def int_nvvm_rcp_rp_d : GCCBuiltin<"__nvvm_rcp_rp_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
 
   def int_nvvm_rcp_approx_ftz_d : GCCBuiltin<"__nvvm_rcp_approx_ftz_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
 
 //
 // Sqrt
 //
 
   def int_nvvm_sqrt_f : GCCBuiltin<"__nvvm_sqrt_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
   def int_nvvm_sqrt_rn_ftz_f : GCCBuiltin<"__nvvm_sqrt_rn_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
   def int_nvvm_sqrt_rn_f : GCCBuiltin<"__nvvm_sqrt_rn_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
   def int_nvvm_sqrt_rz_ftz_f : GCCBuiltin<"__nvvm_sqrt_rz_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
   def int_nvvm_sqrt_rz_f : GCCBuiltin<"__nvvm_sqrt_rz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
   def int_nvvm_sqrt_rm_ftz_f : GCCBuiltin<"__nvvm_sqrt_rm_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
   def int_nvvm_sqrt_rm_f : GCCBuiltin<"__nvvm_sqrt_rm_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
   def int_nvvm_sqrt_rp_ftz_f : GCCBuiltin<"__nvvm_sqrt_rp_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
   def int_nvvm_sqrt_rp_f : GCCBuiltin<"__nvvm_sqrt_rp_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
   def int_nvvm_sqrt_approx_ftz_f : GCCBuiltin<"__nvvm_sqrt_approx_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
   def int_nvvm_sqrt_approx_f : GCCBuiltin<"__nvvm_sqrt_approx_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
 
   def int_nvvm_sqrt_rn_d : GCCBuiltin<"__nvvm_sqrt_rn_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
   def int_nvvm_sqrt_rz_d : GCCBuiltin<"__nvvm_sqrt_rz_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
   def int_nvvm_sqrt_rm_d : GCCBuiltin<"__nvvm_sqrt_rm_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
   def int_nvvm_sqrt_rp_d : GCCBuiltin<"__nvvm_sqrt_rp_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
 
 //
 // Rsqrt
 //
 
   def int_nvvm_rsqrt_approx_ftz_f : GCCBuiltin<"__nvvm_rsqrt_approx_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
   def int_nvvm_rsqrt_approx_f : GCCBuiltin<"__nvvm_rsqrt_approx_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
   def int_nvvm_rsqrt_approx_d : GCCBuiltin<"__nvvm_rsqrt_approx_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
 
 //
 // Add
 //
 
   def int_nvvm_add_rn_ftz_f : GCCBuiltin<"__nvvm_add_rn_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, IntrSpeculatable, Commutative]>;
   def int_nvvm_add_rn_f : GCCBuiltin<"__nvvm_add_rn_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, IntrSpeculatable, Commutative]>;
   def int_nvvm_add_rz_ftz_f : GCCBuiltin<"__nvvm_add_rz_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, IntrSpeculatable, Commutative]>;
   def int_nvvm_add_rz_f : GCCBuiltin<"__nvvm_add_rz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, IntrSpeculatable, Commutative]>;
   def int_nvvm_add_rm_ftz_f : GCCBuiltin<"__nvvm_add_rm_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, IntrSpeculatable, Commutative]>;
   def int_nvvm_add_rm_f : GCCBuiltin<"__nvvm_add_rm_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, IntrSpeculatable, Commutative]>;
   def int_nvvm_add_rp_ftz_f : GCCBuiltin<"__nvvm_add_rp_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, IntrSpeculatable, Commutative]>;
   def int_nvvm_add_rp_f : GCCBuiltin<"__nvvm_add_rp_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, IntrSpeculatable, Commutative]>;
 
   def int_nvvm_add_rn_d : GCCBuiltin<"__nvvm_add_rn_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
-        [IntrNoMem, Commutative]>;
+      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+        [IntrNoMem, IntrSpeculatable, Commutative]>;
   def int_nvvm_add_rz_d : GCCBuiltin<"__nvvm_add_rz_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
-        [IntrNoMem, Commutative]>;
+      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+        [IntrNoMem, IntrSpeculatable, Commutative]>;
   def int_nvvm_add_rm_d : GCCBuiltin<"__nvvm_add_rm_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
-        [IntrNoMem, Commutative]>;
+      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+        [IntrNoMem, IntrSpeculatable, Commutative]>;
   def int_nvvm_add_rp_d : GCCBuiltin<"__nvvm_add_rp_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
-        [IntrNoMem, Commutative]>;
+      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+        [IntrNoMem, IntrSpeculatable, Commutative]>;
 
 //
 // Convert
 //
 
   def int_nvvm_d2f_rn_ftz : GCCBuiltin<"__nvvm_d2f_rn_ftz">,
-      Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_d2f_rn : GCCBuiltin<"__nvvm_d2f_rn">,
-      Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_d2f_rz_ftz : GCCBuiltin<"__nvvm_d2f_rz_ftz">,
-      Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_d2f_rz : GCCBuiltin<"__nvvm_d2f_rz">,
-      Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_d2f_rm_ftz : GCCBuiltin<"__nvvm_d2f_rm_ftz">,
-      Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_d2f_rm : GCCBuiltin<"__nvvm_d2f_rm">,
-      Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_d2f_rp_ftz : GCCBuiltin<"__nvvm_d2f_rp_ftz">,
-      Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_d2f_rp : GCCBuiltin<"__nvvm_d2f_rp">,
-      Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
 
   def int_nvvm_d2i_rn : GCCBuiltin<"__nvvm_d2i_rn">,
-      Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_d2i_rz : GCCBuiltin<"__nvvm_d2i_rz">,
-      Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_d2i_rm : GCCBuiltin<"__nvvm_d2i_rm">,
-      Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_d2i_rp : GCCBuiltin<"__nvvm_d2i_rp">,
-      Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
 
   def int_nvvm_d2ui_rn : GCCBuiltin<"__nvvm_d2ui_rn">,
-      Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_d2ui_rz : GCCBuiltin<"__nvvm_d2ui_rz">,
-      Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_d2ui_rm : GCCBuiltin<"__nvvm_d2ui_rm">,
-      Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_d2ui_rp : GCCBuiltin<"__nvvm_d2ui_rp">,
-      Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
 
   def int_nvvm_i2d_rn : GCCBuiltin<"__nvvm_i2d_rn">,
-      Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_i2d_rz : GCCBuiltin<"__nvvm_i2d_rz">,
-      Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_i2d_rm : GCCBuiltin<"__nvvm_i2d_rm">,
-      Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_i2d_rp : GCCBuiltin<"__nvvm_i2d_rp">,
-      Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
 
   def int_nvvm_ui2d_rn : GCCBuiltin<"__nvvm_ui2d_rn">,
-      Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_ui2d_rz : GCCBuiltin<"__nvvm_ui2d_rz">,
-      Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_ui2d_rm : GCCBuiltin<"__nvvm_ui2d_rm">,
-      Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_ui2d_rp : GCCBuiltin<"__nvvm_ui2d_rp">,
-      Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
 
   def int_nvvm_f2i_rn_ftz : GCCBuiltin<"__nvvm_f2i_rn_ftz">,
-      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_f2i_rn : GCCBuiltin<"__nvvm_f2i_rn">,
-      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_f2i_rz_ftz : GCCBuiltin<"__nvvm_f2i_rz_ftz">,
-      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_f2i_rz : GCCBuiltin<"__nvvm_f2i_rz">,
-      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_f2i_rm_ftz : GCCBuiltin<"__nvvm_f2i_rm_ftz">,
-      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_f2i_rm : GCCBuiltin<"__nvvm_f2i_rm">,
-      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_f2i_rp_ftz : GCCBuiltin<"__nvvm_f2i_rp_ftz">,
-      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_f2i_rp : GCCBuiltin<"__nvvm_f2i_rp">,
-      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
 
   def int_nvvm_f2ui_rn_ftz : GCCBuiltin<"__nvvm_f2ui_rn_ftz">,
-      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_f2ui_rn : GCCBuiltin<"__nvvm_f2ui_rn">,
-      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_f2ui_rz_ftz : GCCBuiltin<"__nvvm_f2ui_rz_ftz">,
-      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_f2ui_rz : GCCBuiltin<"__nvvm_f2ui_rz">,
-      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_f2ui_rm_ftz : GCCBuiltin<"__nvvm_f2ui_rm_ftz">,
-      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_f2ui_rm : GCCBuiltin<"__nvvm_f2ui_rm">,
-      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_f2ui_rp_ftz : GCCBuiltin<"__nvvm_f2ui_rp_ftz">,
-      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_f2ui_rp : GCCBuiltin<"__nvvm_f2ui_rp">,
-      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
 
   def int_nvvm_i2f_rn : GCCBuiltin<"__nvvm_i2f_rn">,
-      Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_i2f_rz : GCCBuiltin<"__nvvm_i2f_rz">,
-      Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_i2f_rm : GCCBuiltin<"__nvvm_i2f_rm">,
-      Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_i2f_rp : GCCBuiltin<"__nvvm_i2f_rp">,
-      Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
 
   def int_nvvm_ui2f_rn : GCCBuiltin<"__nvvm_ui2f_rn">,
-      Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_ui2f_rz : GCCBuiltin<"__nvvm_ui2f_rz">,
-      Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_ui2f_rm : GCCBuiltin<"__nvvm_ui2f_rm">,
-      Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_ui2f_rp : GCCBuiltin<"__nvvm_ui2f_rp">,
-      Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
 
   def int_nvvm_lohi_i2d : GCCBuiltin<"__nvvm_lohi_i2d">,
-      Intrinsic<[llvm_double_ty], [llvm_i32_ty, llvm_i32_ty],
-        [IntrNoMem, Commutative]>;
+      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty, llvm_i32_ty],
+        [IntrNoMem, IntrSpeculatable, Commutative]>;
 
   def int_nvvm_d2i_lo : GCCBuiltin<"__nvvm_d2i_lo">,
-      Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_d2i_hi : GCCBuiltin<"__nvvm_d2i_hi">,
-      Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
 
   def int_nvvm_f2ll_rn_ftz : GCCBuiltin<"__nvvm_f2ll_rn_ftz">,
-      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_f2ll_rn : GCCBuiltin<"__nvvm_f2ll_rn">,
-      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_f2ll_rz_ftz : GCCBuiltin<"__nvvm_f2ll_rz_ftz">,
-      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_f2ll_rz : GCCBuiltin<"__nvvm_f2ll_rz">,
-      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_f2ll_rm_ftz : GCCBuiltin<"__nvvm_f2ll_rm_ftz">,
-      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_f2ll_rm : GCCBuiltin<"__nvvm_f2ll_rm">,
-      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_f2ll_rp_ftz : GCCBuiltin<"__nvvm_f2ll_rp_ftz">,
-      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_f2ll_rp : GCCBuiltin<"__nvvm_f2ll_rp">,
-      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
 
   def int_nvvm_f2ull_rn_ftz : GCCBuiltin<"__nvvm_f2ull_rn_ftz">,
-      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_f2ull_rn : GCCBuiltin<"__nvvm_f2ull_rn">,
-      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_f2ull_rz_ftz : GCCBuiltin<"__nvvm_f2ull_rz_ftz">,
-      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_f2ull_rz : GCCBuiltin<"__nvvm_f2ull_rz">,
-      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_f2ull_rm_ftz : GCCBuiltin<"__nvvm_f2ull_rm_ftz">,
-      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_f2ull_rm : GCCBuiltin<"__nvvm_f2ull_rm">,
-      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_f2ull_rp_ftz : GCCBuiltin<"__nvvm_f2ull_rp_ftz">,
-      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_f2ull_rp : GCCBuiltin<"__nvvm_f2ull_rp">,
-      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
 
   def int_nvvm_d2ll_rn : GCCBuiltin<"__nvvm_d2ll_rn">,
-      Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_d2ll_rz : GCCBuiltin<"__nvvm_d2ll_rz">,
-      Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_d2ll_rm : GCCBuiltin<"__nvvm_d2ll_rm">,
-      Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_d2ll_rp : GCCBuiltin<"__nvvm_d2ll_rp">,
-      Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
 
   def int_nvvm_d2ull_rn : GCCBuiltin<"__nvvm_d2ull_rn">,
-      Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_d2ull_rz : GCCBuiltin<"__nvvm_d2ull_rz">,
-      Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_d2ull_rm : GCCBuiltin<"__nvvm_d2ull_rm">,
-      Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_d2ull_rp : GCCBuiltin<"__nvvm_d2ull_rp">,
-      Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
 
   def int_nvvm_ll2f_rn : GCCBuiltin<"__nvvm_ll2f_rn">,
-      Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_ll2f_rz : GCCBuiltin<"__nvvm_ll2f_rz">,
-      Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_ll2f_rm : GCCBuiltin<"__nvvm_ll2f_rm">,
-      Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_ll2f_rp : GCCBuiltin<"__nvvm_ll2f_rp">,
-      Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_ull2f_rn : GCCBuiltin<"__nvvm_ull2f_rn">,
-      Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_ull2f_rz : GCCBuiltin<"__nvvm_ull2f_rz">,
-      Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_ull2f_rm : GCCBuiltin<"__nvvm_ull2f_rm">,
-      Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_ull2f_rp : GCCBuiltin<"__nvvm_ull2f_rp">,
-      Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
 
   def int_nvvm_ll2d_rn : GCCBuiltin<"__nvvm_ll2d_rn">,
-      Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_ll2d_rz : GCCBuiltin<"__nvvm_ll2d_rz">,
-      Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_ll2d_rm : GCCBuiltin<"__nvvm_ll2d_rm">,
-      Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_ll2d_rp : GCCBuiltin<"__nvvm_ll2d_rp">,
-      Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_ull2d_rn : GCCBuiltin<"__nvvm_ull2d_rn">,
-      Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_ull2d_rz : GCCBuiltin<"__nvvm_ull2d_rz">,
-      Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_ull2d_rm : GCCBuiltin<"__nvvm_ull2d_rm">,
-      Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_ull2d_rp : GCCBuiltin<"__nvvm_ull2d_rp">,
-      Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
 
   def int_nvvm_f2h_rn_ftz : GCCBuiltin<"__nvvm_f2h_rn_ftz">,
-      Intrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_f2h_rn : GCCBuiltin<"__nvvm_f2h_rn">,
-      Intrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
 
 //
 // Bitcast
 //
 
   def int_nvvm_bitcast_f2i : GCCBuiltin<"__nvvm_bitcast_f2i">,
-      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_bitcast_i2f : GCCBuiltin<"__nvvm_bitcast_i2f">,
-      Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
 
   def int_nvvm_bitcast_ll2d : GCCBuiltin<"__nvvm_bitcast_ll2d">,
-      Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
   def int_nvvm_bitcast_d2ll : GCCBuiltin<"__nvvm_bitcast_d2ll">,
-      Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
+      DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
 
 // FNS
 
   def int_nvvm_fns : GCCBuiltin<"__nvvm_fns">,
-      Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
                 [IntrNoMem]>;
 
 // Atomics not available as llvm intrinsics.
@@ -1385,37 +1431,37 @@ def int_nvvm_ldg_global_p : Intrinsic<[llvm_anyptr_ty],
 // - This complements the llvm bitcast, which can be used to cast one type
 //   of pointer to another type of pointer, while the address space remains
 //   the same.
-def int_nvvm_ptr_local_to_gen: Intrinsic<[llvm_anyptr_ty],
-                 [llvm_anyptr_ty], [IntrNoMem],
+def int_nvvm_ptr_local_to_gen: DefaultAttrsIntrinsic<[llvm_anyptr_ty],
+                 [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable],
                  "llvm.nvvm.ptr.local.to.gen">;
-def int_nvvm_ptr_shared_to_gen: Intrinsic<[llvm_anyptr_ty],
-                 [llvm_anyptr_ty], [IntrNoMem],
+def int_nvvm_ptr_shared_to_gen: DefaultAttrsIntrinsic<[llvm_anyptr_ty],
+                 [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable],
                  "llvm.nvvm.ptr.shared.to.gen">;
-def int_nvvm_ptr_global_to_gen: Intrinsic<[llvm_anyptr_ty],
-                 [llvm_anyptr_ty], [IntrNoMem],
+def int_nvvm_ptr_global_to_gen: DefaultAttrsIntrinsic<[llvm_anyptr_ty],
+                 [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable],
                  "llvm.nvvm.ptr.global.to.gen">;
-def int_nvvm_ptr_constant_to_gen: Intrinsic<[llvm_anyptr_ty],
-                 [llvm_anyptr_ty], [IntrNoMem],
+def int_nvvm_ptr_constant_to_gen: DefaultAttrsIntrinsic<[llvm_anyptr_ty],
+                 [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable],
                  "llvm.nvvm.ptr.constant.to.gen">;
 
-def int_nvvm_ptr_gen_to_global: Intrinsic<[llvm_anyptr_ty],
-                 [llvm_anyptr_ty], [IntrNoMem],
+def int_nvvm_ptr_gen_to_global: DefaultAttrsIntrinsic<[llvm_anyptr_ty],
+                 [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable],
                  "llvm.nvvm.ptr.gen.to.global">;
-def int_nvvm_ptr_gen_to_shared: Intrinsic<[llvm_anyptr_ty],
-                 [llvm_anyptr_ty], [IntrNoMem],
+def int_nvvm_ptr_gen_to_shared: DefaultAttrsIntrinsic<[llvm_anyptr_ty],
+                 [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable],
                  "llvm.nvvm.ptr.gen.to.shared">;
-def int_nvvm_ptr_gen_to_local: Intrinsic<[llvm_anyptr_ty],
-                 [llvm_anyptr_ty], [IntrNoMem],
+def int_nvvm_ptr_gen_to_local: DefaultAttrsIntrinsic<[llvm_anyptr_ty],
+                 [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable],
                  "llvm.nvvm.ptr.gen.to.local">;
-def int_nvvm_ptr_gen_to_constant: Intrinsic<[llvm_anyptr_ty],
-                 [llvm_anyptr_ty], [IntrNoMem],
+def int_nvvm_ptr_gen_to_constant: DefaultAttrsIntrinsic<[llvm_anyptr_ty],
+                 [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable],
                  "llvm.nvvm.ptr.gen.to.constant">;
 
 // Used in nvvm internally to help address space opt and ptx code generation
 // This is for params that are passed to kernel functions by pointer by-val.
 def int_nvvm_ptr_gen_to_param: Intrinsic<[llvm_anyptr_ty],
                                      [llvm_anyptr_ty],
-                                   [IntrNoMem],
+                                   [IntrNoMem, IntrSpeculatable],
                                    "llvm.nvvm.ptr.gen.to.param">;
 
 // Move intrinsics, used in nvvm internally
@@ -1453,149 +1499,149 @@ def int_nvvm_reflect :
 
 // isspacep.{const, global, local, shared}
 def int_nvvm_isspacep_const
-  : Intrinsic<[llvm_i1_ty], [llvm_ptr_ty], [IntrNoMem],
+  : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty], [IntrNoMem, IntrSpeculatable],
               "llvm.nvvm.isspacep.const">,
     GCCBuiltin<"__nvvm_isspacep_const">;
 def int_nvvm_isspacep_global
-  : Intrinsic<[llvm_i1_ty], [llvm_ptr_ty], [IntrNoMem],
+  : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty], [IntrNoMem, IntrSpeculatable],
               "llvm.nvvm.isspacep.global">,
     GCCBuiltin<"__nvvm_isspacep_global">;
 def int_nvvm_isspacep_local
-  : Intrinsic<[llvm_i1_ty], [llvm_ptr_ty], [IntrNoMem],
+  : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty], [IntrNoMem, IntrSpeculatable],
               "llvm.nvvm.isspacep.local">,
     GCCBuiltin<"__nvvm_isspacep_local">;
 def int_nvvm_isspacep_shared
-  : Intrinsic<[llvm_i1_ty], [llvm_ptr_ty], [IntrNoMem],
+  : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty], [IntrNoMem, IntrSpeculatable],
               "llvm.nvvm.isspacep.shared">,
     GCCBuiltin<"__nvvm_isspacep_shared">;
 
 // Environment register read
 def int_nvvm_read_ptx_sreg_envreg0
-  : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
               "llvm.nvvm.read.ptx.sreg.envreg0">,
     GCCBuiltin<"__nvvm_read_ptx_sreg_envreg0">;
 def int_nvvm_read_ptx_sreg_envreg1
-  : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
               "llvm.nvvm.read.ptx.sreg.envreg1">,
     GCCBuiltin<"__nvvm_read_ptx_sreg_envreg1">;
 def int_nvvm_read_ptx_sreg_envreg2
-  : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
               "llvm.nvvm.read.ptx.sreg.envreg2">,
     GCCBuiltin<"__nvvm_read_ptx_sreg_envreg2">;
 def int_nvvm_read_ptx_sreg_envreg3
-  : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
               "llvm.nvvm.read.ptx.sreg.envreg3">,
     GCCBuiltin<"__nvvm_read_ptx_sreg_envreg3">;
 def int_nvvm_read_ptx_sreg_envreg4
-  : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
               "llvm.nvvm.read.ptx.sreg.envreg4">,
     GCCBuiltin<"__nvvm_read_ptx_sreg_envreg4">;
 def int_nvvm_read_ptx_sreg_envreg5
-  : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
               "llvm.nvvm.read.ptx.sreg.envreg5">,
     GCCBuiltin<"__nvvm_read_ptx_sreg_envreg5">;
 def int_nvvm_read_ptx_sreg_envreg6
-  : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
               "llvm.nvvm.read.ptx.sreg.envreg6">,
     GCCBuiltin<"__nvvm_read_ptx_sreg_envreg6">;
 def int_nvvm_read_ptx_sreg_envreg7
-  : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
               "llvm.nvvm.read.ptx.sreg.envreg7">,
     GCCBuiltin<"__nvvm_read_ptx_sreg_envreg7">;
 def int_nvvm_read_ptx_sreg_envreg8
-  : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
               "llvm.nvvm.read.ptx.sreg.envreg8">,
     GCCBuiltin<"__nvvm_read_ptx_sreg_envreg8">;
 def int_nvvm_read_ptx_sreg_envreg9
-  : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
               "llvm.nvvm.read.ptx.sreg.envreg9">,
     GCCBuiltin<"__nvvm_read_ptx_sreg_envreg9">;
 def int_nvvm_read_ptx_sreg_envreg10
-  : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
               "llvm.nvvm.read.ptx.sreg.envreg10">,
     GCCBuiltin<"__nvvm_read_ptx_sreg_envreg10">;
 def int_nvvm_read_ptx_sreg_envreg11
-  : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
               "llvm.nvvm.read.ptx.sreg.envreg11">,
     GCCBuiltin<"__nvvm_read_ptx_sreg_envreg11">;
 def int_nvvm_read_ptx_sreg_envreg12
-  : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
               "llvm.nvvm.read.ptx.sreg.envreg12">,
     GCCBuiltin<"__nvvm_read_ptx_sreg_envreg12">;
 def int_nvvm_read_ptx_sreg_envreg13
-  : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
               "llvm.nvvm.read.ptx.sreg.envreg13">,
     GCCBuiltin<"__nvvm_read_ptx_sreg_envreg13">;
 def int_nvvm_read_ptx_sreg_envreg14
-  : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
               "llvm.nvvm.read.ptx.sreg.envreg14">,
     GCCBuiltin<"__nvvm_read_ptx_sreg_envreg14">;
 def int_nvvm_read_ptx_sreg_envreg15
-  : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
               "llvm.nvvm.read.ptx.sreg.envreg15">,
     GCCBuiltin<"__nvvm_read_ptx_sreg_envreg15">;
 def int_nvvm_read_ptx_sreg_envreg16
-  : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
               "llvm.nvvm.read.ptx.sreg.envreg16">,
     GCCBuiltin<"__nvvm_read_ptx_sreg_envreg16">;
 def int_nvvm_read_ptx_sreg_envreg17
-  : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
               "llvm.nvvm.read.ptx.sreg.envreg17">,
     GCCBuiltin<"__nvvm_read_ptx_sreg_envreg17">;
 def int_nvvm_read_ptx_sreg_envreg18
-  : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
               "llvm.nvvm.read.ptx.sreg.envreg18">,
     GCCBuiltin<"__nvvm_read_ptx_sreg_envreg18">;
 def int_nvvm_read_ptx_sreg_envreg19
-  : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
               "llvm.nvvm.read.ptx.sreg.envreg19">,
     GCCBuiltin<"__nvvm_read_ptx_sreg_envreg19">;
 def int_nvvm_read_ptx_sreg_envreg20
-  : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
               "llvm.nvvm.read.ptx.sreg.envreg20">,
     GCCBuiltin<"__nvvm_read_ptx_sreg_envreg20">;
 def int_nvvm_read_ptx_sreg_envreg21
-  : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
               "llvm.nvvm.read.ptx.sreg.envreg21">,
     GCCBuiltin<"__nvvm_read_ptx_sreg_envreg21">;
 def int_nvvm_read_ptx_sreg_envreg22
-  : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
               "llvm.nvvm.read.ptx.sreg.envreg22">,
     GCCBuiltin<"__nvvm_read_ptx_sreg_envreg22">;
 def int_nvvm_read_ptx_sreg_envreg23
-  : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
               "llvm.nvvm.read.ptx.sreg.envreg23">,
     GCCBuiltin<"__nvvm_read_ptx_sreg_envreg23">;
 def int_nvvm_read_ptx_sreg_envreg24
-  : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
               "llvm.nvvm.read.ptx.sreg.envreg24">,
     GCCBuiltin<"__nvvm_read_ptx_sreg_envreg24">;
 def int_nvvm_read_ptx_sreg_envreg25
-  : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
               "llvm.nvvm.read.ptx.sreg.envreg25">,
     GCCBuiltin<"__nvvm_read_ptx_sreg_envreg25">;
 def int_nvvm_read_ptx_sreg_envreg26
-  : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
               "llvm.nvvm.read.ptx.sreg.envreg26">,
     GCCBuiltin<"__nvvm_read_ptx_sreg_envreg26">;
 def int_nvvm_read_ptx_sreg_envreg27
-  : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
               "llvm.nvvm.read.ptx.sreg.envreg27">,
     GCCBuiltin<"__nvvm_read_ptx_sreg_envreg27">;
 def int_nvvm_read_ptx_sreg_envreg28
-  : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
               "llvm.nvvm.read.ptx.sreg.envreg28">,
     GCCBuiltin<"__nvvm_read_ptx_sreg_envreg28">;
 def int_nvvm_read_ptx_sreg_envreg29
-  : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
               "llvm.nvvm.read.ptx.sreg.envreg29">,
     GCCBuiltin<"__nvvm_read_ptx_sreg_envreg29">;
 def int_nvvm_read_ptx_sreg_envreg30
-  : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
               "llvm.nvvm.read.ptx.sreg.envreg30">,
     GCCBuiltin<"__nvvm_read_ptx_sreg_envreg30">;
 def int_nvvm_read_ptx_sreg_envreg31
-  : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
               "llvm.nvvm.read.ptx.sreg.envreg31">,
     GCCBuiltin<"__nvvm_read_ptx_sreg_envreg31">;
 
@@ -4200,49 +4246,49 @@ def int_nvvm_sust_p_3d_v4i32_trap
 
 
 def int_nvvm_rotate_b32
-  : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
-              [IntrNoMem], "llvm.nvvm.rotate.b32">,
+  : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+              [IntrNoMem, IntrSpeculatable], "llvm.nvvm.rotate.b32">,
               GCCBuiltin<"__nvvm_rotate_b32">;
 
 def int_nvvm_rotate_b64
-  :Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty],
-             [IntrNoMem], "llvm.nvvm.rotate.b64">,
+  : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty],
+             [IntrNoMem, IntrSpeculatable], "llvm.nvvm.rotate.b64">,
              GCCBuiltin<"__nvvm_rotate_b64">;
 
 def int_nvvm_rotate_right_b64
-  : Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty],
-              [IntrNoMem], "llvm.nvvm.rotate.right.b64">,
+  : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty],
+              [IntrNoMem, IntrSpeculatable], "llvm.nvvm.rotate.right.b64">,
               GCCBuiltin<"__nvvm_rotate_right_b64">;
 
 def int_nvvm_swap_lo_hi_b64
-  : Intrinsic<[llvm_i64_ty], [llvm_i64_ty],
-              [IntrNoMem], "llvm.nvvm.swap.lo.hi.b64">,
+  : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty],
+              [IntrNoMem, IntrSpeculatable], "llvm.nvvm.swap.lo.hi.b64">,
               GCCBuiltin<"__nvvm_swap_lo_hi_b64">;
 
 
 // Accessing special registers.
 multiclass PTXReadSRegIntrinsic_v4i32<string regname> {
 // FIXME: Do we need the 128-bit integer type version?
-//    def _r64   : Intrinsic<[llvm_i128_ty],   [], [IntrNoMem]>;
+//    def _r64   : Intrinsic<[llvm_i128_ty],   [], [IntrNoMem, IntrSpeculatable]>;
 
 // FIXME: Enable this once v4i32 support is enabled in back-end.
-//    def _v4i16 : Intrinsic<[llvm_v4i32_ty], [], [IntrNoMem]>;
+//    def _v4i16 : Intrinsic<[llvm_v4i32_ty], [], [IntrNoMem, IntrSpeculatable]>;
 
-  def _x     : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+  def _x     : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>,
                GCCBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_x">;
-  def _y     : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+  def _y     : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>,
                GCCBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_y">;
-  def _z     : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+  def _z     : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>,
                GCCBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_z">;
-  def _w     : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+  def _w     : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>,
                GCCBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_w">;
 }
 
 class PTXReadSRegIntrinsic_r32<string name>
-  : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>,
     GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>;
 class PTXReadSRegIntrinsic_r64<string name>
-  : Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>,
+  : DefaultAttrsIntrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrSpeculatable]>,
     GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>;
 
 // Intrinsics to read registers with non-constant values. E.g. the values that
@@ -4519,4 +4565,20 @@ foreach layout_a = ["row", "col"] in {
   } // layout_b
 } // layout_a
 
+// LDMATRIX
+class NVVM_LDMATRIX<WMMA_REGS Frag, int Transposed>
+  : Intrinsic<Frag.regs, [llvm_anyptr_ty],
+              [IntrReadMem, IntrArgMemOnly, ReadOnly<ArgIndex<0>>,
+               NoCapture<ArgIndex<0>>],
+              LDMATRIX_NAME<Frag, Transposed>.intr>;
+
+foreach transposed = [0, 1] in {
+  foreach frag = NVVM_MMA_OPS.all_ldmatrix_ops in {
+    if NVVM_LDMATRIX_SUPPORTED<frag>.ret then {
+      def LDMATRIX_NAME<frag, transposed>.record
+        : NVVM_LDMATRIX<frag, transposed>;
+    }
+  }
+}
+
 } // let TargetPrefix = "nvvm"
diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index 92d3bdea37ed..8290342c0d51 100644
--- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -31,10 +31,12 @@ let TargetPrefix = "ppc" in {  // All intrinsics start with "llvm.ppc.".
 
   // Get content from current FPSCR register
   def int_ppc_readflm : GCCBuiltin<"__builtin_readflm">,
-                        Intrinsic<[llvm_double_ty], [], [IntrNoMem]>;
+                        Intrinsic<[llvm_double_ty], [],
+                                  [IntrNoMerge, IntrHasSideEffects]>;
   // Set FPSCR register, and return previous content
   def int_ppc_setflm : GCCBuiltin<"__builtin_setflm">,
-                       Intrinsic<[llvm_double_ty], [llvm_double_ty], []>;
+                       Intrinsic<[llvm_double_ty], [llvm_double_ty],
+                                 [IntrHasSideEffects]>;
 
   // Intrinsics for [double]word extended forms of divide instructions
   def int_ppc_divwe : GCCBuiltin<"__builtin_divwe">,
@@ -50,6 +52,15 @@ let TargetPrefix = "ppc" in {  // All intrinsics start with "llvm.ppc.".
                        Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
                                  [IntrNoMem]>;
 
+  def int_ppc_unpack_longdouble : GCCBuiltin<"__builtin_unpack_longdouble">,
+                                  Intrinsic<[llvm_double_ty],
+                                            [llvm_ppcf128_ty, llvm_i32_ty],
+                                            [IntrNoMem]>;
+  def int_ppc_pack_longdouble : GCCBuiltin<"__builtin_pack_longdouble">,
+                                Intrinsic<[llvm_ppcf128_ty],
+                                          [llvm_double_ty, llvm_double_ty],
+                                          [IntrNoMem]>;
+
   // Generate a random number
   def int_ppc_darn : GCCBuiltin<"__builtin_darn">,
                      Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>;
@@ -1042,6 +1053,9 @@ let TargetPrefix = "ppc" in {  // All PPC intrinsics start with "llvm.ppc.".
   def int_ppc_altivec_vbpermq : GCCBuiltin<"__builtin_altivec_vbpermq">,
               Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
                         [IntrNoMem]>;
+  def int_ppc_altivec_vbpermd : GCCBuiltin<"__builtin_altivec_vbpermd">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v16i8_ty],
+                        [IntrNoMem]>;
 }
 
 def int_ppc_altivec_vexptefp  : PowerPC_Vec_FF_Intrinsic<"vexptefp">;
@@ -1626,8 +1640,7 @@ let TargetPrefix = "ppc" in {
         Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
   // load
   def int_ppc_load2r
-      : GCCBuiltin<"__builtin_ppc_load2r">,
-        Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>;
+      : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>;
   def int_ppc_load4r
       : GCCBuiltin<"__builtin_ppc_load4r">,
         Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>;
@@ -1706,7 +1719,10 @@ let TargetPrefix = "ppc" in {
   def int_ppc_fres
       : GCCBuiltin<"__builtin_ppc_fres">,
         Intrinsic <[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
-  
+  def int_ppc_addex
+      : GCCBuiltin<"__builtin_ppc_addex">,
+        Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty],
+                  [IntrNoMem, IntrHasSideEffects, ImmArg<ArgIndex<2>>]>;
   def int_ppc_fsel : GCCBuiltin<"__builtin_ppc_fsel">,
                      Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty, 
                                                   llvm_double_ty], [IntrNoMem]>;
@@ -1717,6 +1733,33 @@ let TargetPrefix = "ppc" in {
                         Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
   def int_ppc_frsqrtes : GCCBuiltin<"__builtin_ppc_frsqrtes">,
                          Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_ppc_compare_exp_uo : GCCBuiltin<"__builtin_ppc_compare_exp_uo">,
+                               Intrinsic<[llvm_i32_ty],
+                                         [llvm_double_ty, llvm_double_ty], 
+                                         [IntrNoMem]>;
+  def int_ppc_compare_exp_lt : GCCBuiltin<"__builtin_ppc_compare_exp_lt">,
+                               Intrinsic<[llvm_i32_ty], 
+                                         [llvm_double_ty, llvm_double_ty], 
+                                         [IntrNoMem]>;
+  def int_ppc_compare_exp_gt : GCCBuiltin<"__builtin_ppc_compare_exp_gt">,
+                               Intrinsic<[llvm_i32_ty],
+                                         [llvm_double_ty, llvm_double_ty], 
+                                         [IntrNoMem]>;
+  def int_ppc_compare_exp_eq : GCCBuiltin<"__builtin_ppc_compare_exp_eq">,
+                               Intrinsic<[llvm_i32_ty], 
+                                         [llvm_double_ty, llvm_double_ty], 
+                                         [IntrNoMem]>;
+  def int_ppc_test_data_class_d : Intrinsic<[llvm_i32_ty],
+                                            [llvm_double_ty, llvm_i32_ty],
+                                            [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+  def int_ppc_test_data_class_f : Intrinsic<[llvm_i32_ty],
+                                            [llvm_float_ty, llvm_i32_ty],
+                                            [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+
+  def int_ppc_convert_f128_to_ppcf128
+      : Intrinsic<[llvm_ppcf128_ty], [llvm_f128_ty], [IntrNoMem]>;
+  def int_ppc_convert_ppcf128_to_f128
+      : Intrinsic<[llvm_f128_ty], [llvm_ppcf128_ty], [IntrNoMem]>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -1738,4 +1781,11 @@ let TargetPrefix = "ppc" in {
                                         llvm_i64_ty, llvm_i64_ty,
                                         llvm_i64_ty, llvm_i64_ty],
                                        [IntrArgMemOnly, NoCapture<ArgIndex<0>>]>;
+  def int_ppc_atomic_load_i128 :
+    Intrinsic<[llvm_i64_ty, llvm_i64_ty],
+              [llvm_ptr_ty],
+              [IntrArgMemOnly, IntrReadMem, NoCapture<ArgIndex<0>>]>;
+  def int_ppc_atomic_store_i128 :
+    Intrinsic<[], [llvm_i64_ty, llvm_i64_ty, llvm_ptr_ty],
+              [IntrArgMemOnly, IntrWriteMem, NoCapture<ArgIndex<2>>]>;
 }
diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td
index a46709bf09d1..3ceb347e97bf 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -159,16 +159,17 @@ let TargetPrefix = "riscv" in {
                     [NoCapture<ArgIndex<0>>]>,
                     RISCVVIntrinsic;
   // For unit stride load with mask
-  // Input: (maskedoff, pointer, mask, vl)
+  // Input: (maskedoff, pointer, mask, vl, ta)
   class RISCVUSLoadMask
         : Intrinsic<[llvm_anyvector_ty ],
                     [LLVMMatchType<0>,
                      LLVMPointerType<LLVMMatchType<0>>,
                      LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
-                     llvm_anyint_ty],
-                    [NoCapture<ArgIndex<1>>, IntrReadMem]>, RISCVVIntrinsic;
+                     llvm_anyint_ty, LLVMMatchType<1>],
+                    [NoCapture<ArgIndex<1>>, ImmArg<ArgIndex<4>>, IntrReadMem]>,
+                    RISCVVIntrinsic;
   // For unit stride fault-only-first load with mask
-  // Input: (maskedoff, pointer, mask, vl)
+  // Input: (maskedoff, pointer, mask, vl, ta)
   // Output: (data, vl)
   // NOTE: We model this with default memory properties since we model writing
   // VL as a side effect. IntrReadMem, IntrHasSideEffects does not work.
@@ -177,8 +178,8 @@ let TargetPrefix = "riscv" in {
                     [LLVMMatchType<0>,
                      LLVMPointerType<LLVMMatchType<0>>,
                      LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
-                     LLVMMatchType<1>],
-                    [NoCapture<ArgIndex<1>>]>, RISCVVIntrinsic;
+                     LLVMMatchType<1>, LLVMMatchType<1>],
+                    [NoCapture<ArgIndex<1>>, ImmArg<ArgIndex<4>>]>, RISCVVIntrinsic;
   // For strided load
   // Input: (pointer, stride, vl)
   class RISCVSLoad
@@ -187,13 +188,15 @@ let TargetPrefix = "riscv" in {
                      llvm_anyint_ty, LLVMMatchType<1>],
                     [NoCapture<ArgIndex<0>>, IntrReadMem]>, RISCVVIntrinsic;
   // For strided load with mask
-  // Input: (maskedoff, pointer, stride, mask, vl)
+  // Input: (maskedoff, pointer, stride, mask, vl, ta)
   class RISCVSLoadMask
         : Intrinsic<[llvm_anyvector_ty ],
                     [LLVMMatchType<0>,
                      LLVMPointerType<LLVMMatchType<0>>, llvm_anyint_ty,
-                     LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<1>],
-                    [NoCapture<ArgIndex<1>>, IntrReadMem]>, RISCVVIntrinsic;
+                     LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<1>,
+                     LLVMMatchType<1>],
+                    [NoCapture<ArgIndex<1>>, ImmArg<ArgIndex<5>>, IntrReadMem]>,
+                    RISCVVIntrinsic;
   // For indexed load
   // Input: (pointer, index, vl)
   class RISCVILoad
@@ -202,13 +205,15 @@ let TargetPrefix = "riscv" in {
                      llvm_anyvector_ty, llvm_anyint_ty],
                     [NoCapture<ArgIndex<0>>, IntrReadMem]>, RISCVVIntrinsic;
   // For indexed load with mask
-  // Input: (maskedoff, pointer, index, mask, vl)
+  // Input: (maskedoff, pointer, index, mask, vl, ta)
   class RISCVILoadMask
         : Intrinsic<[llvm_anyvector_ty ],
                     [LLVMMatchType<0>,
                      LLVMPointerType<LLVMMatchType<0>>, llvm_anyvector_ty,
-                     LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
-                    [NoCapture<ArgIndex<1>>, IntrReadMem]>, RISCVVIntrinsic;
+                     LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty,
+                     LLVMMatchType<2>],
+                    [NoCapture<ArgIndex<1>>, ImmArg<ArgIndex<5>>, IntrReadMem]>,
+                    RISCVVIntrinsic;
   // For unit stride store
   // Input: (vector_in, pointer, vl)
   class RISCVUSStore
@@ -265,8 +270,14 @@ let TargetPrefix = "riscv" in {
                     [LLVMMatchType<0>, llvm_anyint_ty],
                     [IntrNoMem]>, RISCVVIntrinsic;
   // For destination vector type is the same as first source vector (with mask).
-  // Input: (vector_in, mask, vl)
+  // Input: (vector_in, mask, vl, ta)
   class RISCVUnaryAAMask
+        : Intrinsic<[llvm_anyvector_ty],
+                    [LLVMMatchType<0>, LLVMMatchType<0>,
+                     LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty,
+                     LLVMMatchType<1>],
+                    [ImmArg<ArgIndex<4>>, IntrNoMem]>, RISCVVIntrinsic;
+  class RISCVUnaryAAMaskNoTA
         : Intrinsic<[llvm_anyvector_ty],
                     [LLVMMatchType<0>, LLVMMatchType<0>,
                      LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
@@ -284,12 +295,13 @@ let TargetPrefix = "riscv" in {
                     [LLVMMatchType<0>, LLVMVectorOfBitcastsToInt<0>, llvm_anyint_ty],
                     [IntrNoMem]>, RISCVVIntrinsic;
   // For destination vector type is the same as first and second source vector.
-  // Input: (vector_in, vector_in, int_vector_in, vl)
+  // Input: (vector_in, vector_in, int_vector_in, vl, ta)
   class RISCVRGatherVVMask
         : Intrinsic<[llvm_anyvector_ty],
                     [LLVMMatchType<0>, LLVMMatchType<0>, LLVMVectorOfBitcastsToInt<0>,
-                     LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
-                    [IntrNoMem]>, RISCVVIntrinsic;
+                     LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty,
+                     LLVMMatchType<1>],
+                    [ImmArg<ArgIndex<5>>, IntrNoMem]>, RISCVVIntrinsic;
   // Input: (vector_in, int16_vector_in, vl)
   class RISCVRGatherEI16VVNoMask
         : Intrinsic<[llvm_anyvector_ty],
@@ -297,13 +309,14 @@ let TargetPrefix = "riscv" in {
                      llvm_anyint_ty],
                     [IntrNoMem]>, RISCVVIntrinsic;
   // For destination vector type is the same as first and second source vector.
-  // Input: (vector_in, vector_in, int16_vector_in, vl)
+  // Input: (vector_in, vector_in, int16_vector_in, vl, ta)
   class RISCVRGatherEI16VVMask
         : Intrinsic<[llvm_anyvector_ty],
                     [LLVMMatchType<0>, LLVMMatchType<0>,
                     LLVMScalarOrSameVectorWidth<0, llvm_i16_ty>,
-                     LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
-                    [IntrNoMem]>, RISCVVIntrinsic;
+                     LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty,
+                     LLVMMatchType<1>],
+                    [ImmArg<ArgIndex<5>>, IntrNoMem]>, RISCVVIntrinsic;
   // For destination vector type is the same as first source vector, and the
   // second operand is XLen.
   // Input: (vector_in, xlen_in, vl)
@@ -314,12 +327,13 @@ let TargetPrefix = "riscv" in {
   }
   // For destination vector type is the same as first source vector (with mask).
   // Second operand is XLen.
-  // Input: (maskedoff, vector_in, xlen_in, mask, vl)
+  // Input: (maskedoff, vector_in, xlen_in, mask, vl, ta)
   class RISCVGatherVXMask
        : Intrinsic<[llvm_anyvector_ty],
                    [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyint_ty,
-                    LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<1>],
-                   [IntrNoMem]>, RISCVVIntrinsic {
+                    LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<1>,
+                    LLVMMatchType<1>],
+                   [ImmArg<ArgIndex<5>>, IntrNoMem]>, RISCVVIntrinsic {
   }
   // For destination vector type is the same as first source vector.
   // Input: (vector_in, vector_in/scalar_in, vl)
@@ -330,12 +344,13 @@ let TargetPrefix = "riscv" in {
     let SplatOperand = 2;
   }
   // For destination vector type is the same as first source vector (with mask).
-  // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl)
+  // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, ta)
   class RISCVBinaryAAXMask
        : Intrinsic<[llvm_anyvector_ty],
                    [LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty,
-                    LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
-                   [IntrNoMem]>, RISCVVIntrinsic {
+                    LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty,
+                    LLVMMatchType<2>],
+                   [ImmArg<ArgIndex<5>>, IntrNoMem]>, RISCVVIntrinsic {
     let SplatOperand = 3;
   }
   // For destination vector type is the same as first source vector. The
@@ -347,12 +362,13 @@ let TargetPrefix = "riscv" in {
                     [IntrNoMem]>, RISCVVIntrinsic;
   // For destination vector type is the same as first source vector (with mask).
   // The second source operand must match the destination type or be an XLen scalar.
-  // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl)
+  // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, ta)
   class RISCVBinaryAAShiftMask
        : Intrinsic<[llvm_anyvector_ty],
                    [LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty,
-                    LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
-                   [IntrNoMem]>, RISCVVIntrinsic;
+                    LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty,
+                    LLVMMatchType<2>],
+                   [ImmArg<ArgIndex<5>>, IntrNoMem]>, RISCVVIntrinsic;
   // For destination vector type is NOT the same as first source vector.
   // Input: (vector_in, vector_in/scalar_in, vl)
   class RISCVBinaryABXNoMask
@@ -362,12 +378,13 @@ let TargetPrefix = "riscv" in {
     let SplatOperand = 2;
   }
   // For destination vector type is NOT the same as first source vector (with mask).
-  // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl)
+  // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, ta)
   class RISCVBinaryABXMask
         : Intrinsic<[llvm_anyvector_ty],
                     [LLVMMatchType<0>, llvm_anyvector_ty, llvm_any_ty,
-                     LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
-                    [IntrNoMem]>, RISCVVIntrinsic {
+                     LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty,
+                     LLVMMatchType<3>],
+                    [ImmArg<ArgIndex<5>>, IntrNoMem]>, RISCVVIntrinsic {
     let SplatOperand = 3;
   }
   // For destination vector type is NOT the same as first source vector. The
@@ -379,12 +396,13 @@ let TargetPrefix = "riscv" in {
                     [IntrNoMem]>, RISCVVIntrinsic;
   // For destination vector type is NOT the same as first source vector (with mask).
   // The second source operand must match the destination type or be an XLen scalar.
-  // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl)
+  // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, ta)
   class RISCVBinaryABShiftMask
         : Intrinsic<[llvm_anyvector_ty],
                     [LLVMMatchType<0>, llvm_anyvector_ty, llvm_any_ty,
-                     LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
-                    [IntrNoMem]>, RISCVVIntrinsic;
+                     LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty,
+                     LLVMMatchType<3>],
+                    [ImmArg<ArgIndex<5>>, IntrNoMem]>, RISCVVIntrinsic;
   // For binary operations with V0 as input.
   // Input: (vector_in, vector_in/scalar_in, V0, vl)
   class RISCVBinaryWithV0
@@ -461,12 +479,13 @@ let TargetPrefix = "riscv" in {
   }
   // For Saturating binary operations with mask.
   // The destination vector type is the same as first source vector.
-  // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl)
+  // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, ta)
   class RISCVSaturatingBinaryAAXMask
         : Intrinsic<[llvm_anyvector_ty],
                     [LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty,
-                     LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
-                    [IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic {
+                     LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty,
+                     LLVMMatchType<2>],
+                    [ImmArg<ArgIndex<5>>, IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic {
     let SplatOperand = 3;
   }
   // For Saturating binary operations.
@@ -480,12 +499,13 @@ let TargetPrefix = "riscv" in {
   // For Saturating binary operations with mask.
   // The destination vector type is the same as first source vector.
   // The second source operand matches the destination type or is an XLen scalar.
-  // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl)
+  // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, ta)
   class RISCVSaturatingBinaryAAShiftMask
         : Intrinsic<[llvm_anyvector_ty],
                     [LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty,
-                     LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
-                    [IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic;
+                     LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty,
+                     LLVMMatchType<2>],
+                    [ImmArg<ArgIndex<5>>, IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic;
   // For Saturating binary operations.
   // The destination vector type is NOT the same as first source vector.
   // The second source operand matches the destination type or is an XLen scalar.
@@ -497,12 +517,13 @@ let TargetPrefix = "riscv" in {
   // For Saturating binary operations with mask.
   // The destination vector type is NOT the same as first source vector (with mask).
   // The second source operand matches the destination type or is an XLen scalar.
-  // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl)
+  // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, ta)
   class RISCVSaturatingBinaryABShiftMask
         : Intrinsic<[llvm_anyvector_ty],
                     [LLVMMatchType<0>, llvm_anyvector_ty, llvm_any_ty,
-                     LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
-                    [IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic;
+                     LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty,
+                     LLVMMatchType<3>],
+                    [ImmArg<ArgIndex<5>>, IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic;
   class RISCVTernaryAAAXNoMask
         : Intrinsic<[llvm_anyvector_ty],
                     [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyint_ty,
@@ -579,13 +600,13 @@ let TargetPrefix = "riscv" in {
                     [llvm_anyvector_ty, llvm_anyint_ty],
                     [IntrNoMem]>, RISCVVIntrinsic;
   // For destination vector type is NOT the same as source vector (with mask).
-  // Input: (maskedoff, vector_in, mask, vl)
+  // Input: (maskedoff, vector_in, mask, vl, ta)
   class RISCVUnaryABMask
         : Intrinsic<[llvm_anyvector_ty],
                     [LLVMMatchType<0>, llvm_anyvector_ty,
                      LLVMScalarOrSameVectorWidth<1, llvm_i1_ty>,
-                     llvm_anyint_ty],
-                    [IntrNoMem]>, RISCVVIntrinsic;
+                     llvm_anyint_ty, LLVMMatchType<2>],
+                    [ImmArg<ArgIndex<4>>, IntrNoMem]>, RISCVVIntrinsic;
   // For unary operations with the same vector type in/out without mask
   // Output: (vector)
   // Input: (vector_in, vl)
@@ -614,12 +635,13 @@ let TargetPrefix = "riscv" in {
                     [llvm_anyvector_ty, llvm_anyint_ty],
                     [IntrNoMem]>, RISCVVIntrinsic;
   // For Conversion unary operations with mask.
-  // Input: (maskedoff, vector_in, mask, vl)
+  // Input: (maskedoff, vector_in, mask, vl, ta)
   class RISCVConversionMask
         : Intrinsic<[llvm_anyvector_ty],
                     [LLVMMatchType<0>, llvm_anyvector_ty,
-                     LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
-                    [IntrNoMem]>, RISCVVIntrinsic;
+                     LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty,
+                     LLVMMatchType<2>],
+                    [ImmArg<ArgIndex<4>>, IntrNoMem]>, RISCVVIntrinsic;
   // For atomic operations without mask
   // Input: (base, index, value, vl)
   class RISCVAMONoMask
@@ -643,15 +665,16 @@ let TargetPrefix = "riscv" in {
                     [LLVMPointerToElt<0>, llvm_anyint_ty],
                     [NoCapture<ArgIndex<0>>, IntrReadMem]>, RISCVVIntrinsic;
   // For unit stride segment load with mask
-  // Input: (maskedoff, pointer, mask, vl)
+  // Input: (maskedoff, pointer, mask, vl, ta)
   class RISCVUSSegLoadMask<int nf>
         : Intrinsic<!listconcat([llvm_anyvector_ty], !listsplat(LLVMMatchType<0>,
                                 !add(nf, -1))),
                     !listconcat(!listsplat(LLVMMatchType<0>, nf),
                                 [LLVMPointerToElt<0>,
                                  LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
-                                 llvm_anyint_ty]),
-                    [NoCapture<ArgIndex<nf>>, IntrReadMem]>, RISCVVIntrinsic;
+                                 llvm_anyint_ty, LLVMMatchType<1>]),
+                    [ImmArg<ArgIndex<!add(nf, 3)>>, NoCapture<ArgIndex<nf>>, IntrReadMem]>,
+                    RISCVVIntrinsic;
 
   // For unit stride fault-only-first segment load
   // Input: (pointer, vl)
@@ -664,7 +687,7 @@ let TargetPrefix = "riscv" in {
                     [LLVMPointerToElt<0>, LLVMMatchType<1>],
                     [NoCapture<ArgIndex<0>>]>, RISCVVIntrinsic;
   // For unit stride fault-only-first segment load with mask
-  // Input: (maskedoff, pointer, mask, vl)
+  // Input: (maskedoff, pointer, mask, vl, ta)
   // Output: (data, vl)
   // NOTE: We model this with default memory properties since we model writing
   // VL as a side effect. IntrReadMem, IntrHasSideEffects does not work.
@@ -674,8 +697,9 @@ let TargetPrefix = "riscv" in {
                     !listconcat(!listsplat(LLVMMatchType<0>, nf),
                      [LLVMPointerToElt<0>,
                       LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
-                      LLVMMatchType<1>]),
-                    [NoCapture<ArgIndex<nf>>]>, RISCVVIntrinsic;
+                      LLVMMatchType<1>, LLVMMatchType<1>]),
+                    [ImmArg<ArgIndex<!add(nf, 3)>>, NoCapture<ArgIndex<nf>>]>,
+                    RISCVVIntrinsic;
 
   // For stride segment load
   // Input: (pointer, offset, vl)
@@ -685,7 +709,7 @@ let TargetPrefix = "riscv" in {
                     [LLVMPointerToElt<0>, llvm_anyint_ty, LLVMMatchType<1>],
                     [NoCapture<ArgIndex<0>>, IntrReadMem]>, RISCVVIntrinsic;
   // For stride segment load with mask
-  // Input: (maskedoff, pointer, offset, mask, vl)
+  // Input: (maskedoff, pointer, offset, mask, vl, ta)
   class RISCVSSegLoadMask<int nf>
         : Intrinsic<!listconcat([llvm_anyvector_ty], !listsplat(LLVMMatchType<0>,
                                 !add(nf, -1))),
@@ -693,8 +717,9 @@ let TargetPrefix = "riscv" in {
                                 [LLVMPointerToElt<0>,
                                  llvm_anyint_ty,
                                  LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
-                                 LLVMMatchType<1>]),
-                    [NoCapture<ArgIndex<nf>>, IntrReadMem]>, RISCVVIntrinsic;
+                                 LLVMMatchType<1>, LLVMMatchType<1>]),
+                    [ImmArg<ArgIndex<!add(nf, 4)>>, NoCapture<ArgIndex<nf>>, IntrReadMem]>,
+                    RISCVVIntrinsic;
 
   // For indexed segment load
   // Input: (pointer, index, vl)
@@ -704,7 +729,7 @@ let TargetPrefix = "riscv" in {
                     [LLVMPointerToElt<0>, llvm_anyvector_ty, llvm_anyint_ty],
                     [NoCapture<ArgIndex<0>>, IntrReadMem]>, RISCVVIntrinsic;
   // For indexed segment load with mask
-  // Input: (maskedoff, pointer, index, mask, vl)
+  // Input: (maskedoff, pointer, index, mask, vl, ta)
   class RISCVISegLoadMask<int nf>
         : Intrinsic<!listconcat([llvm_anyvector_ty], !listsplat(LLVMMatchType<0>,
                                 !add(nf, -1))),
@@ -712,8 +737,9 @@ let TargetPrefix = "riscv" in {
                                 [LLVMPointerToElt<0>,
                                  llvm_anyvector_ty,
                                  LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
-                                 llvm_anyint_ty]),
-                    [NoCapture<ArgIndex<nf>>, IntrReadMem]>, RISCVVIntrinsic;
+                                 llvm_anyint_ty, LLVMMatchType<2>]),
+                    [ImmArg<ArgIndex<!add(nf, 4)>>, NoCapture<ArgIndex<nf>>, IntrReadMem]>,
+                    RISCVVIntrinsic;
 
   // For unit stride segment store
   // Input: (value, pointer, vl)
@@ -947,8 +973,8 @@ let TargetPrefix = "riscv" in {
   defm vsoxei : RISCVIStore;
   defm vsuxei : RISCVIStore;
 
-  def int_riscv_vle1 : RISCVUSLoad;
-  def int_riscv_vse1 : RISCVUSStore;
+  def int_riscv_vlm : RISCVUSLoad;
+  def int_riscv_vsm : RISCVUSStore;
 
   defm vamoswap : RISCVAMO;
   defm vamoadd : RISCVAMO;
@@ -1049,7 +1075,7 @@ let TargetPrefix = "riscv" in {
   defm vssubu : RISCVSaturatingBinaryAAX;
   defm vssub : RISCVSaturatingBinaryAAX;
 
-  def int_riscv_vmerge : RISCVBinaryWithV0;
+  defm vmerge : RISCVBinaryWithV0;
 
   def int_riscv_vmv_v_v : Intrinsic<[llvm_anyvector_ty],
                                     [LLVMMatchType<0>, llvm_anyint_ty],
@@ -1124,7 +1150,7 @@ let TargetPrefix = "riscv" in {
   defm vrgather_vx : RISCVRGatherVX;
   defm vrgatherei16_vv : RISCVRGatherEI16VV;
 
-  def "int_riscv_vcompress" : RISCVUnaryAAMask;
+  def "int_riscv_vcompress" : RISCVUnaryAAMaskNoTA;
 
   defm vaaddu : RISCVSaturatingBinaryAAX;
   defm vaadd : RISCVSaturatingBinaryAAX;
@@ -1159,25 +1185,25 @@ let TargetPrefix = "riscv" in {
   defm vwredsum : RISCVReduction;
 
   defm vfredosum : RISCVReduction;
-  defm vfredsum : RISCVReduction;
+  defm vfredusum : RISCVReduction;
   defm vfredmin : RISCVReduction;
   defm vfredmax : RISCVReduction;
 
-  defm vfwredsum : RISCVReduction;
+  defm vfwredusum : RISCVReduction;
   defm vfwredosum : RISCVReduction;
 
   def int_riscv_vmand: RISCVBinaryAAANoMask;
   def int_riscv_vmnand: RISCVBinaryAAANoMask;
-  def int_riscv_vmandnot: RISCVBinaryAAANoMask;
+  def int_riscv_vmandn: RISCVBinaryAAANoMask;
   def int_riscv_vmxor: RISCVBinaryAAANoMask;
   def int_riscv_vmor: RISCVBinaryAAANoMask;
   def int_riscv_vmnor: RISCVBinaryAAANoMask;
-  def int_riscv_vmornot: RISCVBinaryAAANoMask;
+  def int_riscv_vmorn: RISCVBinaryAAANoMask;
   def int_riscv_vmxnor: RISCVBinaryAAANoMask;
   def int_riscv_vmclr : RISCVNullaryIntrinsic;
   def int_riscv_vmset : RISCVNullaryIntrinsic;
 
-  defm vpopc : RISCVMaskUnarySOut;
+  defm vcpop : RISCVMaskUnarySOut;
   defm vfirst : RISCVMaskUnarySOut;
   defm vmsbf : RISCVMaskUnaryMOut;
   defm vmsof : RISCVMaskUnaryMOut;
@@ -1245,4 +1271,15 @@ let TargetPrefix = "riscv" in {
     defm vsuxseg # nf : RISCVISegStore<nf>;
   }
 
+  // Strided loads/stores for fixed vectors.
+  def int_riscv_masked_strided_load
+        : Intrinsic<[llvm_anyvector_ty],
+                    [LLVMMatchType<0>, llvm_anyptr_ty,
+                     llvm_anyint_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
+                    [NoCapture<ArgIndex<1>>, IntrReadMem]>;
+  def int_riscv_masked_strided_store
+        : Intrinsic<[],
+                    [llvm_anyvector_ty, llvm_anyptr_ty,
+                     llvm_anyint_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
+                    [NoCapture<ArgIndex<1>>, IntrWriteMem]>;
 } // TargetPrefix = "riscv"
diff --git a/llvm/include/llvm/IR/IntrinsicsSystemZ.td b/llvm/include/llvm/IR/IntrinsicsSystemZ.td
index 81435e98bea0..a149b571072c 100644
--- a/llvm/include/llvm/IR/IntrinsicsSystemZ.td
+++ b/llvm/include/llvm/IR/IntrinsicsSystemZ.td
@@ -144,7 +144,7 @@ multiclass SystemZBinaryCCBHF {
   def fs : SystemZBinaryCC<llvm_v4i32_ty>;
 }
 
-multiclass SystemZCompareBHFG<string name> {
+multiclass SystemZCompareBHFG {
   def bs : SystemZBinaryCC<llvm_v16i8_ty>;
   def hs : SystemZBinaryCC<llvm_v8i16_ty>;
   def fs : SystemZBinaryCC<llvm_v4i32_ty>;
@@ -341,9 +341,9 @@ let TargetPrefix = "s390" in {
 
   def int_s390_vtm : SystemZBinaryConv<"vtm", llvm_i32_ty, llvm_v16i8_ty>;
 
-  defm int_s390_vceq : SystemZCompareBHFG<"vceq">;
-  defm int_s390_vch  : SystemZCompareBHFG<"vch">;
-  defm int_s390_vchl : SystemZCompareBHFG<"vchl">;
+  defm int_s390_vceq : SystemZCompareBHFG;
+  defm int_s390_vch  : SystemZCompareBHFG;
+  defm int_s390_vchl : SystemZCompareBHFG;
 
   defm int_s390_vfae  : SystemZTernaryIntBHF<"vfae">;
   defm int_s390_vfae  : SystemZTernaryIntCCBHF;
diff --git a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
index 11990554037d..6a8e6c797f85 100644
--- a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
+++ b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
@@ -50,7 +50,8 @@ def int_wasm_trunc_saturate_unsigned : Intrinsic<[llvm_anyint_ty],
 //===----------------------------------------------------------------------===//
 
 // throw / rethrow
-// The immediate argument is an index to a tag, which is 0 for C++.
+// The first immediate argument is an index to a tag, which is 0 for C++
+// exception. The second argument is the thrown exception pointer.
 def int_wasm_throw : Intrinsic<[], [llvm_i32_ty, llvm_ptr_ty],
                                [Throws, IntrNoReturn, ImmArg<ArgIndex<0>>]>;
 def int_wasm_rethrow : Intrinsic<[], [], [Throws, IntrNoReturn]>;
@@ -63,8 +64,9 @@ def int_wasm_get_ehselector : Intrinsic<[llvm_i32_ty], [llvm_token_ty],
                                         [IntrHasSideEffects]>;
 
 // wasm.catch returns the pointer to the exception object caught by wasm 'catch'
-// instruction. This returns a single pointer, which is sufficient for C++
-// support. The immediate argument is an index to for a tag, which is 0 for C++.
+// instruction. This returns a single pointer, which is the case for C++
+// exceptions. The immediate argument is an index to for a tag, which is 0 for
+// C++ exceptions.
 def int_wasm_catch : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty],
                                [IntrHasSideEffects, ImmArg<ArgIndex<0>>]>;
 
@@ -162,6 +164,15 @@ def int_wasm_q15mulr_sat_signed :
             [llvm_v8i16_ty, llvm_v8i16_ty],
             [IntrNoMem, IntrSpeculatable]>;
 
+def int_wasm_pmin :
+  Intrinsic<[llvm_anyvector_ty],
+            [LLVMMatchType<0>, LLVMMatchType<0>],
+            [IntrNoMem, IntrSpeculatable]>;
+def int_wasm_pmax :
+  Intrinsic<[llvm_anyvector_ty],
+            [LLVMMatchType<0>, LLVMMatchType<0>],
+            [IntrNoMem, IntrSpeculatable]>;
+
 def int_wasm_extadd_pairwise_signed :
   Intrinsic<[llvm_anyvector_ty],
             [LLVMSubdivide2VectorType<0>],
@@ -171,6 +182,59 @@ def int_wasm_extadd_pairwise_unsigned :
             [LLVMSubdivide2VectorType<0>],
             [IntrNoMem, IntrSpeculatable]>;
 
+//===----------------------------------------------------------------------===//
+// Relaxed SIMD intrinsics (experimental)
+//===----------------------------------------------------------------------===//
+
+def int_wasm_fma :
+  Intrinsic<[llvm_anyvector_ty],
+            [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
+            [IntrNoMem, IntrSpeculatable]>;
+def int_wasm_fms :
+  Intrinsic<[llvm_anyvector_ty],
+            [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
+            [IntrNoMem, IntrSpeculatable]>;
+
+def int_wasm_laneselect :
+  Intrinsic<[llvm_anyvector_ty],
+            [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
+            [IntrNoMem, IntrSpeculatable]>;
+
+def int_wasm_relaxed_swizzle :
+  Intrinsic<[llvm_v16i8_ty],
+            [llvm_v16i8_ty, llvm_v16i8_ty],
+            [IntrNoMem, IntrSpeculatable]>;
+
+def int_wasm_relaxed_min :
+  Intrinsic<[llvm_anyvector_ty],
+            [LLVMMatchType<0>, LLVMMatchType<0>],
+            [IntrNoMem, IntrSpeculatable]>;
+def int_wasm_relaxed_max :
+  Intrinsic<[llvm_anyvector_ty],
+            [LLVMMatchType<0>, LLVMMatchType<0>],
+            [IntrNoMem, IntrSpeculatable]>;
+
+def int_wasm_relaxed_trunc_signed:
+  Intrinsic<[llvm_v4i32_ty],
+            [llvm_v4f32_ty],
+            [IntrNoMem, IntrSpeculatable]>;
+
+def int_wasm_relaxed_trunc_unsigned:
+  Intrinsic<[llvm_v4i32_ty],
+            [llvm_v4f32_ty],
+            [IntrNoMem, IntrSpeculatable]>;
+
+def int_wasm_relaxed_trunc_zero_signed:
+  Intrinsic<[llvm_v4i32_ty],
+            [llvm_v2f64_ty],
+            [IntrNoMem, IntrSpeculatable]>;
+
+def int_wasm_relaxed_trunc_zero_unsigned:
+  Intrinsic<[llvm_v4i32_ty],
+            [llvm_v2f64_ty],
+            [IntrNoMem, IntrSpeculatable]>;
+
+
 //===----------------------------------------------------------------------===//
 // Thread-local storage intrinsics
 //===----------------------------------------------------------------------===//
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index 5848356b5b1a..8de737a1c7a5 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -792,7 +792,7 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_sse41_mpsadbw         : GCCBuiltin<"__builtin_ia32_mpsadbw128">,
           Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty,llvm_i8_ty],
-                    [IntrNoMem, Commutative, ImmArg<ArgIndex<2>>]>;
+                    [IntrNoMem, ImmArg<ArgIndex<2>>]>;
 }
 
 // Test instruction with bitwise comparison.
@@ -1779,7 +1779,7 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
                          llvm_v32i8_ty], [IntrNoMem]>;
   def int_x86_avx2_mpsadbw : GCCBuiltin<"__builtin_ia32_mpsadbw256">,
               Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
-                         llvm_i8_ty], [IntrNoMem, Commutative, ImmArg<ArgIndex<2>>]>;
+                         llvm_i8_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -5093,6 +5093,10 @@ let TargetPrefix = "x86" in {
                         [llvm_i16_ty, llvm_i16_ty, llvm_i16_ty,
                          llvm_x86amx_ty, llvm_x86amx_ty,
                          llvm_x86amx_ty], []>;
+  def int_x86_cast_vector_to_tile:
+              Intrinsic<[llvm_x86amx_ty], [llvm_anyvector_ty], [IntrNoMem]>;
+  def int_x86_cast_tile_to_vector:
+              Intrinsic<[llvm_anyvector_ty], [llvm_x86amx_ty], [IntrNoMem]>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -5108,3 +5112,757 @@ let TargetPrefix = "x86" in {
   def int_x86_senduipi : GCCBuiltin<"__builtin_ia32_senduipi">,
               Intrinsic<[], [llvm_i64_ty], []>;
 }
+
+//===----------------------------------------------------------------------===//
+// avx512_fp16: vaddph
+let TargetPrefix = "x86" in {
+  def int_x86_avx512fp16_add_ph_512
+      : GCCBuiltin<"__builtin_ia32_addph512">,
+        Intrinsic<[ llvm_v32f16_ty ],
+                  [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<2>> ]>;
+  def int_x86_avx512fp16_sub_ph_512
+      : GCCBuiltin<"__builtin_ia32_subph512">,
+        Intrinsic<[ llvm_v32f16_ty ],
+                  [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<2>> ]>;
+  def int_x86_avx512fp16_mul_ph_512
+      : GCCBuiltin<"__builtin_ia32_mulph512">,
+        Intrinsic<[ llvm_v32f16_ty ],
+                  [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<2>> ]>;
+  def int_x86_avx512fp16_div_ph_512
+      : GCCBuiltin<"__builtin_ia32_divph512">,
+        Intrinsic<[ llvm_v32f16_ty ],
+                  [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<2>> ]>;
+  def int_x86_avx512fp16_max_ph_128
+      : GCCBuiltin<"__builtin_ia32_maxph128">,
+        Intrinsic<[ llvm_v8f16_ty ],
+                  [ llvm_v8f16_ty, llvm_v8f16_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_max_ph_256
+      : GCCBuiltin<"__builtin_ia32_maxph256">,
+        Intrinsic<[ llvm_v16f16_ty ],
+                  [ llvm_v16f16_ty, llvm_v16f16_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_max_ph_512
+      : GCCBuiltin<"__builtin_ia32_maxph512">,
+        Intrinsic<[ llvm_v32f16_ty ],
+                  [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<2>> ]>;
+  def int_x86_avx512fp16_min_ph_128
+      : GCCBuiltin<"__builtin_ia32_minph128">,
+        Intrinsic<[ llvm_v8f16_ty ],
+                  [ llvm_v8f16_ty, llvm_v8f16_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_min_ph_256
+      : GCCBuiltin<"__builtin_ia32_minph256">,
+        Intrinsic<[ llvm_v16f16_ty ],
+                  [ llvm_v16f16_ty, llvm_v16f16_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_min_ph_512
+      : GCCBuiltin<"__builtin_ia32_minph512">,
+        Intrinsic<[ llvm_v32f16_ty ],
+                  [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<2>> ]>;
+
+  def int_x86_avx512fp16_mask_cmp_ph_512
+      : Intrinsic<[ llvm_v32i1_ty ],
+                  [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty, llvm_v32i1_ty,
+                  llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<4>> ]>;
+  def int_x86_avx512fp16_mask_cmp_ph_256
+      : Intrinsic<[ llvm_v16i1_ty ],
+                  [ llvm_v16f16_ty, llvm_v16f16_ty, llvm_i32_ty, llvm_v16i1_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<2>> ]>;
+  def int_x86_avx512fp16_mask_cmp_ph_128
+      : Intrinsic<[ llvm_v8i1_ty ],
+                  [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i32_ty, llvm_v8i1_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<2>> ]>;
+
+  def int_x86_avx512fp16_mask_add_sh_round
+      : GCCBuiltin<"__builtin_ia32_addsh_round_mask">,
+        Intrinsic<[ llvm_v8f16_ty ],
+                  [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty,
+                    llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+  def int_x86_avx512fp16_mask_sub_sh_round
+      : GCCBuiltin<"__builtin_ia32_subsh_round_mask">,
+        Intrinsic<[ llvm_v8f16_ty ],
+                  [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty,
+                    llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+  def int_x86_avx512fp16_mask_mul_sh_round
+      : GCCBuiltin<"__builtin_ia32_mulsh_round_mask">,
+        Intrinsic<[ llvm_v8f16_ty ],
+                  [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty,
+                    llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+  def int_x86_avx512fp16_mask_div_sh_round
+      : GCCBuiltin<"__builtin_ia32_divsh_round_mask">,
+        Intrinsic<[ llvm_v8f16_ty ],
+                  [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty,
+                    llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+  def int_x86_avx512fp16_mask_min_sh_round
+      : GCCBuiltin<"__builtin_ia32_minsh_round_mask">,
+        Intrinsic<[ llvm_v8f16_ty ],
+                  [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty,
+                    llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+  def int_x86_avx512fp16_mask_max_sh_round
+      : GCCBuiltin<"__builtin_ia32_maxsh_round_mask">,
+        Intrinsic<[ llvm_v8f16_ty ],
+                  [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty,
+                    llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+  def int_x86_avx512fp16_mask_cmp_sh
+      : GCCBuiltin<"__builtin_ia32_cmpsh_mask">,
+        Intrinsic<[ llvm_i8_ty ],
+                  [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i32_ty, llvm_i8_ty,
+                    llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<4>> ]>;
+  def int_x86_avx512fp16_vcomi_sh
+      : GCCBuiltin<"__builtin_ia32_vcomish">,
+        Intrinsic<[ llvm_i32_ty ],
+                  [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i32_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>> ]>;
+
+  def int_x86_avx512fp16_mask_vcvtph2psx_128
+      : GCCBuiltin<"__builtin_ia32_vcvtph2psx128_mask">,
+        Intrinsic<[ llvm_v4f32_ty ],
+                  [ llvm_v8f16_ty, llvm_v4f32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvtph2psx_256
+      : GCCBuiltin<"__builtin_ia32_vcvtph2psx256_mask">,
+        Intrinsic<[ llvm_v8f32_ty ],
+                  [ llvm_v8f16_ty, llvm_v8f32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvtph2psx_512
+      : GCCBuiltin<"__builtin_ia32_vcvtph2psx512_mask">,
+        Intrinsic<[ llvm_v16f32_ty ],
+                  [ llvm_v16f16_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+  def int_x86_avx512fp16_mask_vcvtps2phx_128
+      : GCCBuiltin<"__builtin_ia32_vcvtps2phx128_mask">,
+        Intrinsic<[ llvm_v8f16_ty ],
+                  [ llvm_v4f32_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvtps2phx_256
+      : GCCBuiltin<"__builtin_ia32_vcvtps2phx256_mask">,
+        Intrinsic<[ llvm_v8f16_ty ],
+                  [ llvm_v8f32_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvtps2phx_512
+      : GCCBuiltin<"__builtin_ia32_vcvtps2phx512_mask">,
+        Intrinsic<[ llvm_v16f16_ty ],
+                  [ llvm_v16f32_ty, llvm_v16f16_ty, llvm_i16_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+  def int_x86_avx512fp16_mask_vcvtpd2ph_128
+      : GCCBuiltin<"__builtin_ia32_vcvtpd2ph128_mask">,
+        Intrinsic<[ llvm_v8f16_ty ],
+                  [ llvm_v2f64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvtpd2ph_256
+      : GCCBuiltin<"__builtin_ia32_vcvtpd2ph256_mask">,
+        Intrinsic<[ llvm_v8f16_ty ],
+                  [ llvm_v4f64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvtpd2ph_512
+      : GCCBuiltin<"__builtin_ia32_vcvtpd2ph512_mask">,
+        Intrinsic<[ llvm_v8f16_ty ],
+                  [ llvm_v8f64_ty, llvm_v8f16_ty, llvm_i8_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+  def int_x86_avx512fp16_mask_vcvtph2pd_128
+      : GCCBuiltin<"__builtin_ia32_vcvtph2pd128_mask">,
+        Intrinsic<[ llvm_v2f64_ty ],
+                  [ llvm_v8f16_ty, llvm_v2f64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvtph2pd_256
+      : GCCBuiltin<"__builtin_ia32_vcvtph2pd256_mask">,
+        Intrinsic<[ llvm_v4f64_ty ],
+                  [ llvm_v8f16_ty, llvm_v4f64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvtph2pd_512
+      : GCCBuiltin<"__builtin_ia32_vcvtph2pd512_mask">,
+        Intrinsic<[ llvm_v8f64_ty ],
+                  [ llvm_v8f16_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+  def int_x86_avx512fp16_mask_vcvtsh2ss_round
+      : GCCBuiltin<"__builtin_ia32_vcvtsh2ss_round_mask">,
+        Intrinsic<[ llvm_v4f32_ty ],
+                  [ llvm_v4f32_ty, llvm_v8f16_ty, llvm_v4f32_ty, llvm_i8_ty,
+                    llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+  def int_x86_avx512fp16_mask_vcvtss2sh_round
+      : GCCBuiltin<"__builtin_ia32_vcvtss2sh_round_mask">,
+        Intrinsic<[ llvm_v8f16_ty ],
+                  [ llvm_v8f16_ty, llvm_v4f32_ty, llvm_v8f16_ty, llvm_i8_ty,
+                    llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+  def int_x86_avx512fp16_mask_vcvtsd2sh_round
+      : GCCBuiltin<"__builtin_ia32_vcvtsd2sh_round_mask">,
+        Intrinsic<[ llvm_v8f16_ty ],
+                  [ llvm_v8f16_ty, llvm_v2f64_ty, llvm_v8f16_ty, llvm_i8_ty,
+                    llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+  def int_x86_avx512fp16_mask_vcvtsh2sd_round
+      : GCCBuiltin<"__builtin_ia32_vcvtsh2sd_round_mask">,
+        Intrinsic<[ llvm_v2f64_ty ],
+                  [ llvm_v2f64_ty, llvm_v8f16_ty, llvm_v2f64_ty, llvm_i8_ty,
+                    llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+
+  def int_x86_avx512fp16_mask_vcvtph2w_128
+      : GCCBuiltin<"__builtin_ia32_vcvtph2w128_mask">,
+        Intrinsic<[ llvm_v8i16_ty ],
+                  [ llvm_v8f16_ty, llvm_v8i16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvtph2w_256
+      : GCCBuiltin<"__builtin_ia32_vcvtph2w256_mask">,
+        Intrinsic<[ llvm_v16i16_ty ],
+                  [ llvm_v16f16_ty, llvm_v16i16_ty, llvm_i16_ty ],
+                  [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvtph2w_512
+      : GCCBuiltin<"__builtin_ia32_vcvtph2w512_mask">,
+        Intrinsic<[ llvm_v32i16_ty ],
+                  [ llvm_v32f16_ty, llvm_v32i16_ty, llvm_i32_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+  def int_x86_avx512fp16_mask_vcvttph2w_128
+      : GCCBuiltin<"__builtin_ia32_vcvttph2w128_mask">,
+        Intrinsic<[ llvm_v8i16_ty ],
+                  [ llvm_v8f16_ty, llvm_v8i16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvttph2w_256
+      : GCCBuiltin<"__builtin_ia32_vcvttph2w256_mask">,
+        Intrinsic<[ llvm_v16i16_ty ],
+                  [ llvm_v16f16_ty, llvm_v16i16_ty, llvm_i16_ty ],
+                  [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvttph2w_512
+      : GCCBuiltin<"__builtin_ia32_vcvttph2w512_mask">,
+        Intrinsic<[ llvm_v32i16_ty ],
+                  [ llvm_v32f16_ty, llvm_v32i16_ty, llvm_i32_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+  def int_x86_avx512fp16_mask_vcvtph2uw_128
+      : GCCBuiltin<"__builtin_ia32_vcvtph2uw128_mask">,
+        Intrinsic<[ llvm_v8i16_ty ],
+                  [ llvm_v8f16_ty, llvm_v8i16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvtph2uw_256
+      : GCCBuiltin<"__builtin_ia32_vcvtph2uw256_mask">,
+        Intrinsic<[ llvm_v16i16_ty ],
+                  [ llvm_v16f16_ty, llvm_v16i16_ty, llvm_i16_ty ],
+                  [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvtph2uw_512
+      : GCCBuiltin<"__builtin_ia32_vcvtph2uw512_mask">,
+        Intrinsic<[ llvm_v32i16_ty ],
+                  [ llvm_v32f16_ty, llvm_v32i16_ty, llvm_i32_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+  def int_x86_avx512fp16_mask_vcvttph2uw_128
+      : GCCBuiltin<"__builtin_ia32_vcvttph2uw128_mask">,
+        Intrinsic<[ llvm_v8i16_ty ],
+                  [ llvm_v8f16_ty, llvm_v8i16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvttph2uw_256
+      : GCCBuiltin<"__builtin_ia32_vcvttph2uw256_mask">,
+        Intrinsic<[ llvm_v16i16_ty ],
+                  [ llvm_v16f16_ty, llvm_v16i16_ty, llvm_i16_ty ],
+                  [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvttph2uw_512
+      : GCCBuiltin<"__builtin_ia32_vcvttph2uw512_mask">,
+        Intrinsic<[ llvm_v32i16_ty ],
+                  [ llvm_v32f16_ty, llvm_v32i16_ty, llvm_i32_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+
+  def int_x86_avx512fp16_mask_vcvtph2dq_128
+      : GCCBuiltin<"__builtin_ia32_vcvtph2dq128_mask">,
+        Intrinsic<[ llvm_v4i32_ty ],
+                  [ llvm_v8f16_ty, llvm_v4i32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvtph2dq_256
+      : GCCBuiltin<"__builtin_ia32_vcvtph2dq256_mask">,
+        Intrinsic<[ llvm_v8i32_ty ],
+                  [ llvm_v8f16_ty, llvm_v8i32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvtph2dq_512
+      : GCCBuiltin<"__builtin_ia32_vcvtph2dq512_mask">,
+        Intrinsic<[ llvm_v16i32_ty ],
+                  [ llvm_v16f16_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+  def int_x86_avx512fp16_mask_vcvtph2udq_128
+      : GCCBuiltin<"__builtin_ia32_vcvtph2udq128_mask">,
+        Intrinsic<[ llvm_v4i32_ty ],
+                  [ llvm_v8f16_ty, llvm_v4i32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvtph2udq_256
+      : GCCBuiltin<"__builtin_ia32_vcvtph2udq256_mask">,
+        Intrinsic<[ llvm_v8i32_ty ],
+                  [ llvm_v8f16_ty, llvm_v8i32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvtph2udq_512
+      : GCCBuiltin<"__builtin_ia32_vcvtph2udq512_mask">,
+        Intrinsic<[ llvm_v16i32_ty ],
+                  [ llvm_v16f16_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+  def int_x86_avx512fp16_mask_vcvtdq2ph_128
+      : GCCBuiltin<"__builtin_ia32_vcvtdq2ph128_mask">,
+        Intrinsic<[ llvm_v8f16_ty ],
+                  [ llvm_v4i32_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvtudq2ph_128
+      : GCCBuiltin<"__builtin_ia32_vcvtudq2ph128_mask">,
+        Intrinsic<[ llvm_v8f16_ty ],
+                  [ llvm_v4i32_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvttph2dq_128
+      : GCCBuiltin<"__builtin_ia32_vcvttph2dq128_mask">,
+        Intrinsic<[ llvm_v4i32_ty ],
+                  [ llvm_v8f16_ty, llvm_v4i32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvttph2dq_256
+      : GCCBuiltin<"__builtin_ia32_vcvttph2dq256_mask">,
+        Intrinsic<[ llvm_v8i32_ty ],
+                  [ llvm_v8f16_ty, llvm_v8i32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvttph2dq_512
+      : GCCBuiltin<"__builtin_ia32_vcvttph2dq512_mask">,
+        Intrinsic<[ llvm_v16i32_ty ],
+                  [ llvm_v16f16_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+  def int_x86_avx512fp16_mask_vcvttph2udq_128
+      : GCCBuiltin<"__builtin_ia32_vcvttph2udq128_mask">,
+        Intrinsic<[ llvm_v4i32_ty ],
+                  [ llvm_v8f16_ty, llvm_v4i32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvttph2udq_256
+      : GCCBuiltin<"__builtin_ia32_vcvttph2udq256_mask">,
+        Intrinsic<[ llvm_v8i32_ty ],
+                  [ llvm_v8f16_ty, llvm_v8i32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvttph2udq_512
+      : GCCBuiltin<"__builtin_ia32_vcvttph2udq512_mask">,
+        Intrinsic<[ llvm_v16i32_ty ],
+                  [ llvm_v16f16_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+
+  def int_x86_avx512fp16_mask_vcvtqq2ph_128
+      : GCCBuiltin<"__builtin_ia32_vcvtqq2ph128_mask">,
+        Intrinsic<[ llvm_v8f16_ty ],
+                  [ llvm_v2i64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvtqq2ph_256
+      : GCCBuiltin<"__builtin_ia32_vcvtqq2ph256_mask">,
+        Intrinsic<[ llvm_v8f16_ty ],
+                  [ llvm_v4i64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvtph2qq_128
+      : GCCBuiltin<"__builtin_ia32_vcvtph2qq128_mask">,
+        Intrinsic<[ llvm_v2i64_ty ],
+                  [ llvm_v8f16_ty, llvm_v2i64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvtph2qq_256
+      : GCCBuiltin<"__builtin_ia32_vcvtph2qq256_mask">,
+        Intrinsic<[ llvm_v4i64_ty ],
+                  [ llvm_v8f16_ty, llvm_v4i64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvtph2qq_512
+      : GCCBuiltin<"__builtin_ia32_vcvtph2qq512_mask">,
+        Intrinsic<[ llvm_v8i64_ty ],
+                  [ llvm_v8f16_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+  def int_x86_avx512fp16_mask_vcvtuqq2ph_128
+      : GCCBuiltin<"__builtin_ia32_vcvtuqq2ph128_mask">,
+        Intrinsic<[ llvm_v8f16_ty ],
+                  [ llvm_v2i64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvtuqq2ph_256
+      : GCCBuiltin<"__builtin_ia32_vcvtuqq2ph256_mask">,
+        Intrinsic<[ llvm_v8f16_ty ],
+                  [ llvm_v4i64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvtph2uqq_128
+      : GCCBuiltin<"__builtin_ia32_vcvtph2uqq128_mask">,
+        Intrinsic<[ llvm_v2i64_ty ],
+                  [ llvm_v8f16_ty, llvm_v2i64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvtph2uqq_256
+      : GCCBuiltin<"__builtin_ia32_vcvtph2uqq256_mask">,
+        Intrinsic<[ llvm_v4i64_ty ],
+                  [ llvm_v8f16_ty, llvm_v4i64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvtph2uqq_512
+      : GCCBuiltin<"__builtin_ia32_vcvtph2uqq512_mask">,
+        Intrinsic<[ llvm_v8i64_ty ],
+                  [ llvm_v8f16_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+  def int_x86_avx512fp16_mask_vcvttph2qq_128
+      : GCCBuiltin<"__builtin_ia32_vcvttph2qq128_mask">,
+        Intrinsic<[ llvm_v2i64_ty ],
+                  [ llvm_v8f16_ty, llvm_v2i64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvttph2qq_256
+      : GCCBuiltin<"__builtin_ia32_vcvttph2qq256_mask">,
+        Intrinsic<[ llvm_v4i64_ty ],
+                  [ llvm_v8f16_ty, llvm_v4i64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvttph2qq_512
+      : GCCBuiltin<"__builtin_ia32_vcvttph2qq512_mask">,
+        Intrinsic<[ llvm_v8i64_ty ],
+                  [ llvm_v8f16_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+  def int_x86_avx512fp16_mask_vcvttph2uqq_128
+      : GCCBuiltin<"__builtin_ia32_vcvttph2uqq128_mask">,
+        Intrinsic<[ llvm_v2i64_ty ],
+                  [ llvm_v8f16_ty, llvm_v2i64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvttph2uqq_256
+      : GCCBuiltin<"__builtin_ia32_vcvttph2uqq256_mask">,
+        Intrinsic<[ llvm_v4i64_ty ],
+                  [ llvm_v8f16_ty, llvm_v4i64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvttph2uqq_512
+      : GCCBuiltin<"__builtin_ia32_vcvttph2uqq512_mask">,
+        Intrinsic<[ llvm_v8i64_ty ],
+                  [ llvm_v8f16_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+
+  def int_x86_avx512fp16_vcvtsh2si32
+      : GCCBuiltin<"__builtin_ia32_vcvtsh2si32">,
+        Intrinsic<[ llvm_i32_ty ], [ llvm_v8f16_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+  def int_x86_avx512fp16_vcvtsh2usi32
+      : GCCBuiltin<"__builtin_ia32_vcvtsh2usi32">,
+        Intrinsic<[ llvm_i32_ty ], [ llvm_v8f16_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+  def int_x86_avx512fp16_vcvtsh2si64
+      : GCCBuiltin<"__builtin_ia32_vcvtsh2si64">,
+        Intrinsic<[ llvm_i64_ty ], [ llvm_v8f16_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+  def int_x86_avx512fp16_vcvtsh2usi64
+      : GCCBuiltin<"__builtin_ia32_vcvtsh2usi64">,
+        Intrinsic<[ llvm_i64_ty ], [ llvm_v8f16_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+  def int_x86_avx512fp16_vcvtusi2sh
+      : GCCBuiltin<"__builtin_ia32_vcvtusi2sh">,
+        Intrinsic<[ llvm_v8f16_ty ],
+                  [ llvm_v8f16_ty, llvm_i32_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<2>> ]>;
+  def int_x86_avx512fp16_vcvtusi642sh
+      : GCCBuiltin<"__builtin_ia32_vcvtusi642sh">,
+        Intrinsic<[ llvm_v8f16_ty ],
+                  [ llvm_v8f16_ty, llvm_i64_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<2>> ]>;
+  def int_x86_avx512fp16_vcvtsi2sh
+      : GCCBuiltin<"__builtin_ia32_vcvtsi2sh">,
+        Intrinsic<[ llvm_v8f16_ty ],
+                  [ llvm_v8f16_ty, llvm_i32_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<2>> ]>;
+  def int_x86_avx512fp16_vcvtsi642sh
+      : GCCBuiltin<"__builtin_ia32_vcvtsi642sh">,
+        Intrinsic<[ llvm_v8f16_ty ],
+                  [ llvm_v8f16_ty, llvm_i64_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<2>> ]>;
+  def int_x86_avx512fp16_vcvttsh2si32
+      : GCCBuiltin<"__builtin_ia32_vcvttsh2si32">,
+        Intrinsic<[ llvm_i32_ty ], [ llvm_v8f16_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+  def int_x86_avx512fp16_vcvttsh2si64
+      : GCCBuiltin<"__builtin_ia32_vcvttsh2si64">,
+        Intrinsic<[ llvm_i64_ty ], [ llvm_v8f16_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+  def int_x86_avx512fp16_vcvttsh2usi32
+      : GCCBuiltin<"__builtin_ia32_vcvttsh2usi32">,
+        Intrinsic<[ llvm_i32_ty ], [ llvm_v8f16_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+  def int_x86_avx512fp16_vcvttsh2usi64
+      : GCCBuiltin<"__builtin_ia32_vcvttsh2usi64">,
+        Intrinsic<[ llvm_i64_ty ], [ llvm_v8f16_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+
+  def int_x86_avx512fp16_sqrt_ph_512
+      : Intrinsic<[ llvm_v32f16_ty ], [ llvm_v32f16_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+  def int_x86_avx512fp16_mask_sqrt_sh
+      : Intrinsic<[ llvm_v8f16_ty ],
+                  [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty,
+                    llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+  def int_x86_avx512fp16_mask_rsqrt_ph_128
+      : GCCBuiltin<"__builtin_ia32_rsqrtph128_mask">,
+        Intrinsic<[ llvm_v8f16_ty ],
+                  [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_rsqrt_ph_256
+      : GCCBuiltin<"__builtin_ia32_rsqrtph256_mask">,
+        Intrinsic<[ llvm_v16f16_ty ],
+                  [ llvm_v16f16_ty, llvm_v16f16_ty, llvm_i16_ty ],
+                  [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_rsqrt_ph_512
+      : GCCBuiltin<"__builtin_ia32_rsqrtph512_mask">,
+        Intrinsic<[ llvm_v32f16_ty ],
+                  [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ],
+                  [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_rsqrt_sh
+      : GCCBuiltin<"__builtin_ia32_rsqrtsh_mask">,
+        Intrinsic<[ llvm_v8f16_ty ],
+                  [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty ],
+                  [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_rcp_ph_128
+      : GCCBuiltin<"__builtin_ia32_rcpph128_mask">,
+        Intrinsic<[ llvm_v8f16_ty ],
+                  [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_rcp_ph_256
+      : GCCBuiltin<"__builtin_ia32_rcpph256_mask">,
+        Intrinsic<[ llvm_v16f16_ty ],
+                  [ llvm_v16f16_ty, llvm_v16f16_ty, llvm_i16_ty ],
+                  [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_rcp_ph_512
+      : GCCBuiltin<"__builtin_ia32_rcpph512_mask">,
+        Intrinsic<[ llvm_v32f16_ty ],
+                  [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ],
+                  [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_rcp_sh
+      : GCCBuiltin<"__builtin_ia32_rcpsh_mask">,
+        Intrinsic<[ llvm_v8f16_ty ],
+                  [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty ],
+                  [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_reduce_ph_128
+      : GCCBuiltin<"__builtin_ia32_reduceph128_mask">,
+        Intrinsic<[ llvm_v8f16_ty ],
+                  [ llvm_v8f16_ty, llvm_i32_ty, llvm_v8f16_ty, llvm_i8_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+  def int_x86_avx512fp16_mask_reduce_ph_256
+      : GCCBuiltin<"__builtin_ia32_reduceph256_mask">,
+        Intrinsic<[ llvm_v16f16_ty ],
+                  [ llvm_v16f16_ty, llvm_i32_ty, llvm_v16f16_ty, llvm_i16_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+  def int_x86_avx512fp16_mask_reduce_ph_512
+      : GCCBuiltin<"__builtin_ia32_reduceph512_mask">,
+        Intrinsic<[ llvm_v32f16_ty ],
+                  [ llvm_v32f16_ty, llvm_i32_ty, llvm_v32f16_ty, llvm_i32_ty,
+                    llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<4>> ]>;
+  def int_x86_avx512fp16_mask_reduce_sh
+      : GCCBuiltin<"__builtin_ia32_reducesh_mask">,
+        Intrinsic<[ llvm_v8f16_ty ],
+                  [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty,
+                    llvm_i32_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>> ]>;
+  def int_x86_avx512fp16_fpclass_ph_128
+      : Intrinsic<[ llvm_v8i1_ty ], [ llvm_v8f16_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+  def int_x86_avx512fp16_fpclass_ph_256
+      : Intrinsic<[ llvm_v16i1_ty ], [ llvm_v16f16_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+  def int_x86_avx512fp16_fpclass_ph_512
+      : Intrinsic<[ llvm_v32i1_ty ], [ llvm_v32f16_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+  def int_x86_avx512fp16_mask_fpclass_sh
+      : GCCBuiltin<"__builtin_ia32_fpclasssh_mask">,
+        Intrinsic<[ llvm_i8_ty ], [ llvm_v8f16_ty, llvm_i32_ty, llvm_i8_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+  def int_x86_avx512fp16_mask_getexp_ph_128
+      : GCCBuiltin<"__builtin_ia32_getexpph128_mask">,
+        Intrinsic<[ llvm_v8f16_ty ],
+                  [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_getexp_ph_256
+      : GCCBuiltin<"__builtin_ia32_getexpph256_mask">,
+        Intrinsic<[ llvm_v16f16_ty ],
+                  [ llvm_v16f16_ty, llvm_v16f16_ty, llvm_i16_ty ],
+                  [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_getexp_ph_512
+      : GCCBuiltin<"__builtin_ia32_getexpph512_mask">,
+        Intrinsic<[ llvm_v32f16_ty ],
+                  [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+  def int_x86_avx512fp16_mask_getexp_sh
+      : GCCBuiltin<"__builtin_ia32_getexpsh128_round_mask">,
+        Intrinsic<[ llvm_v8f16_ty ],
+                  [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty,
+                    llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+  def int_x86_avx512fp16_mask_getmant_ph_128
+      : GCCBuiltin<"__builtin_ia32_getmantph128_mask">,
+        Intrinsic<[ llvm_v8f16_ty ],
+                  [ llvm_v8f16_ty, llvm_i32_ty, llvm_v8f16_ty, llvm_i8_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+  def int_x86_avx512fp16_mask_getmant_ph_256
+      : GCCBuiltin<"__builtin_ia32_getmantph256_mask">,
+        Intrinsic<[ llvm_v16f16_ty ],
+                  [ llvm_v16f16_ty, llvm_i32_ty, llvm_v16f16_ty, llvm_i16_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+  def int_x86_avx512fp16_mask_getmant_ph_512
+      : GCCBuiltin<"__builtin_ia32_getmantph512_mask">,
+        Intrinsic<[ llvm_v32f16_ty ],
+                  [ llvm_v32f16_ty, llvm_i32_ty, llvm_v32f16_ty, llvm_i32_ty,
+                    llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<4>> ]>;
+  def int_x86_avx512fp16_mask_getmant_sh
+      : GCCBuiltin<"__builtin_ia32_getmantsh_round_mask">,
+        Intrinsic<[ llvm_v8f16_ty ],
+                  [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i32_ty, llvm_v8f16_ty,
+                    llvm_i8_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<5>> ]>;
+  def int_x86_avx512fp16_mask_rndscale_ph_128
+      : GCCBuiltin<"__builtin_ia32_rndscaleph_128_mask">,
+        Intrinsic<[ llvm_v8f16_ty ],
+                  [ llvm_v8f16_ty, llvm_i32_ty, llvm_v8f16_ty, llvm_i8_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+  def int_x86_avx512fp16_mask_rndscale_ph_256
+      : GCCBuiltin<"__builtin_ia32_rndscaleph_256_mask">,
+        Intrinsic<[ llvm_v16f16_ty ],
+                  [ llvm_v16f16_ty, llvm_i32_ty, llvm_v16f16_ty, llvm_i16_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+  def int_x86_avx512fp16_mask_rndscale_ph_512
+      : GCCBuiltin<"__builtin_ia32_rndscaleph_mask">,
+        Intrinsic<[ llvm_v32f16_ty ],
+                  [ llvm_v32f16_ty, llvm_i32_ty, llvm_v32f16_ty, llvm_i32_ty,
+                    llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<4>> ]>;
+  def int_x86_avx512fp16_mask_rndscale_sh
+      : GCCBuiltin<"__builtin_ia32_rndscalesh_round_mask">,
+        Intrinsic<[ llvm_v8f16_ty ],
+                  [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty,
+                    llvm_i32_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>> ]>;
+  def int_x86_avx512fp16_mask_scalef_ph_128
+      : GCCBuiltin<"__builtin_ia32_scalefph128_mask">,
+        Intrinsic<[ llvm_v8f16_ty ],
+                  [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty ],
+                  [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_scalef_ph_256
+      : GCCBuiltin<"__builtin_ia32_scalefph256_mask">,
+        Intrinsic<[ llvm_v16f16_ty ],
+                  [ llvm_v16f16_ty, llvm_v16f16_ty, llvm_v16f16_ty, llvm_i16_ty ],
+                  [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_scalef_ph_512
+      : GCCBuiltin<"__builtin_ia32_scalefph512_mask">,
+        Intrinsic<[ llvm_v32f16_ty ],
+                  [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty,
+                    llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+  def int_x86_avx512fp16_mask_scalef_sh
+      : GCCBuiltin<"__builtin_ia32_scalefsh_round_mask">,
+        Intrinsic<[ llvm_v8f16_ty ],
+                  [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty,
+                    llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+
+  def int_x86_avx512fp16_vfmadd_ph_512
+      : Intrinsic<[ llvm_v32f16_ty ],
+                  [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+  def int_x86_avx512fp16_vfmaddsub_ph_128
+      : GCCBuiltin<"__builtin_ia32_vfmaddsubph">,
+        Intrinsic<[ llvm_v8f16_ty ],
+                  [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty ],
+                  [ IntrNoMem ]>;
+  def int_x86_avx512fp16_vfmaddsub_ph_256
+      : GCCBuiltin<"__builtin_ia32_vfmaddsubph256">,
+        Intrinsic<[ llvm_v16f16_ty ],
+                  [ llvm_v16f16_ty, llvm_v16f16_ty, llvm_v16f16_ty ],
+                  [ IntrNoMem ]>;
+  def int_x86_avx512fp16_vfmaddsub_ph_512
+      : Intrinsic<[ llvm_v32f16_ty ],
+                  [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+  def int_x86_avx512fp16_vfmadd_f16
+      : Intrinsic<[ llvm_half_ty ],
+                  [ llvm_half_ty, llvm_half_ty, llvm_half_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+
+  def int_x86_avx512fp16_mask_vfcmadd_cph_128
+      : GCCBuiltin<"__builtin_ia32_vfcmaddcph128_mask">,
+        Intrinsic<[ llvm_v4f32_ty ],
+                  [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty ],
+                  [ IntrNoMem ]>;
+  def int_x86_avx512fp16_maskz_vfcmadd_cph_128
+      : GCCBuiltin<"__builtin_ia32_vfcmaddcph128_maskz">,
+        Intrinsic<[ llvm_v4f32_ty ],
+                  [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty ],
+                  [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vfcmadd_cph_256
+      : GCCBuiltin<"__builtin_ia32_vfcmaddcph256_mask">,
+        Intrinsic<[ llvm_v8f32_ty ],
+                  [ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty ],
+                  [ IntrNoMem ]>;
+  def int_x86_avx512fp16_maskz_vfcmadd_cph_256
+      : GCCBuiltin<"__builtin_ia32_vfcmaddcph256_maskz">,
+        Intrinsic<[ llvm_v8f32_ty ],
+                  [ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty ],
+                  [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vfcmadd_cph_512
+      : GCCBuiltin<"__builtin_ia32_vfcmaddcph512_mask3">,
+        Intrinsic<[ llvm_v16f32_ty ],
+                  [ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
+                    llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+  def int_x86_avx512fp16_maskz_vfcmadd_cph_512
+      : GCCBuiltin<"__builtin_ia32_vfcmaddcph512_maskz">,
+        Intrinsic<[ llvm_v16f32_ty ],
+                  [ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
+                    llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+  def int_x86_avx512fp16_mask_vfmadd_cph_128
+      : GCCBuiltin<"__builtin_ia32_vfmaddcph128_mask">,
+        Intrinsic<[ llvm_v4f32_ty ],
+                  [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty ],
+                  [ IntrNoMem ]>;
+  def int_x86_avx512fp16_maskz_vfmadd_cph_128
+      : GCCBuiltin<"__builtin_ia32_vfmaddcph128_maskz">,
+        Intrinsic<[ llvm_v4f32_ty ],
+                  [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty ],
+                  [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vfmadd_cph_256
+      : GCCBuiltin<"__builtin_ia32_vfmaddcph256_mask">,
+        Intrinsic<[ llvm_v8f32_ty ],
+                  [ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty ],
+                  [ IntrNoMem ]>;
+  def int_x86_avx512fp16_maskz_vfmadd_cph_256
+      : GCCBuiltin<"__builtin_ia32_vfmaddcph256_maskz">,
+        Intrinsic<[ llvm_v8f32_ty ],
+                  [ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty ],
+                  [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vfmadd_cph_512
+      : GCCBuiltin<"__builtin_ia32_vfmaddcph512_mask3">,
+        Intrinsic<[ llvm_v16f32_ty ],
+                  [ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
+                    llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+  def int_x86_avx512fp16_maskz_vfmadd_cph_512
+      : GCCBuiltin<"__builtin_ia32_vfmaddcph512_maskz">,
+        Intrinsic<[ llvm_v16f32_ty ],
+                  [ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
+                    llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+  def int_x86_avx512fp16_mask_vfmadd_csh
+      : GCCBuiltin<"__builtin_ia32_vfmaddcsh_mask">,
+        Intrinsic<[ llvm_v4f32_ty ],
+                  [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty,
+                    llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+  def int_x86_avx512fp16_maskz_vfmadd_csh
+      : GCCBuiltin<"__builtin_ia32_vfmaddcsh_maskz">,
+        Intrinsic<[ llvm_v4f32_ty ],
+                  [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty,
+                    llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+  def int_x86_avx512fp16_mask_vfcmadd_csh
+      : GCCBuiltin<"__builtin_ia32_vfcmaddcsh_mask">,
+        Intrinsic<[ llvm_v4f32_ty ],
+                  [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty,
+                    llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+  def int_x86_avx512fp16_maskz_vfcmadd_csh
+      : GCCBuiltin<"__builtin_ia32_vfcmaddcsh_maskz">,
+        Intrinsic<[ llvm_v4f32_ty ],
+                  [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty,
+                    llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+  def int_x86_avx512fp16_mask_vfmul_cph_128
+      : GCCBuiltin<"__builtin_ia32_vfmulcph128_mask">,
+        Intrinsic<[ llvm_v4f32_ty ],
+                  [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty ],
+                  [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vfcmul_cph_128
+      : GCCBuiltin<"__builtin_ia32_vfcmulcph128_mask">,
+        Intrinsic<[ llvm_v4f32_ty ],
+                  [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty ],
+                  [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vfmul_cph_256
+      : GCCBuiltin<"__builtin_ia32_vfmulcph256_mask">,
+        Intrinsic<[ llvm_v8f32_ty ],
+                  [ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty ],
+                  [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vfcmul_cph_256
+      : GCCBuiltin<"__builtin_ia32_vfcmulcph256_mask">,
+        Intrinsic<[ llvm_v8f32_ty ],
+                  [ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty ],
+                  [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vfmul_cph_512
+      : GCCBuiltin<"__builtin_ia32_vfmulcph512_mask">,
+        Intrinsic<[ llvm_v16f32_ty ],
+                  [ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
+                    llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+  def int_x86_avx512fp16_mask_vfcmul_cph_512
+      : GCCBuiltin<"__builtin_ia32_vfcmulcph512_mask">,
+        Intrinsic<[ llvm_v16f32_ty ],
+                  [ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
+                    llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+  def int_x86_avx512fp16_mask_vfmul_csh
+      : GCCBuiltin<"__builtin_ia32_vfmulcsh_mask">,
+        Intrinsic<[ llvm_v4f32_ty ],
+                  [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty,
+                    llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+  def int_x86_avx512fp16_mask_vfcmul_csh
+      : GCCBuiltin<"__builtin_ia32_vfcmulcsh_mask">,
+        Intrinsic<[ llvm_v4f32_ty ],
+                  [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty,
+                    llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+}
diff --git a/llvm/include/llvm/IR/LLVMContext.h b/llvm/include/llvm/IR/LLVMContext.h
index bc605f108340..1c902ebce5ad 100644
--- a/llvm/include/llvm/IR/LLVMContext.h
+++ b/llvm/include/llvm/IR/LLVMContext.h
@@ -305,6 +305,10 @@ public:
   /// LLVMContext is used by compilation.
   void setOptPassGate(OptPassGate&);
 
+  /// Enable opaque pointers. Can only be called before creating the first
+  /// pointer type.
+  void enableOpaquePointers() const;
+
   /// Whether typed pointers are supported. If false, all pointers are opaque.
   bool supportsTypedPointers() const;
 
diff --git a/llvm/include/llvm/IR/MatrixBuilder.h b/llvm/include/llvm/IR/MatrixBuilder.h
index b14127df2182..6cc5797269e2 100644
--- a/llvm/include/llvm/IR/MatrixBuilder.h
+++ b/llvm/include/llvm/IR/MatrixBuilder.h
@@ -74,7 +74,7 @@ public:
 
     Value *Ops[] = {DataPtr, Stride, B.getInt1(IsVolatile), B.getInt32(Rows),
                     B.getInt32(Columns)};
-    Type *OverloadedTypes[] = {RetType};
+    Type *OverloadedTypes[] = {RetType, Stride->getType()};
 
     Function *TheFn = Intrinsic::getDeclaration(
         getModule(), Intrinsic::matrix_column_major_load, OverloadedTypes);
@@ -82,7 +82,7 @@ public:
     CallInst *Call = B.CreateCall(TheFn->getFunctionType(), TheFn, Ops, Name);
     Attribute AlignAttr =
         Attribute::getWithAlignment(Call->getContext(), Alignment);
-    Call->addAttribute(1, AlignAttr);
+    Call->addParamAttr(0, AlignAttr);
     return Call;
   }
 
@@ -97,7 +97,7 @@ public:
     Value *Ops[] = {Matrix,           Ptr,
                     Stride,           B.getInt1(IsVolatile),
                     B.getInt32(Rows), B.getInt32(Columns)};
-    Type *OverloadedTypes[] = {Matrix->getType()};
+    Type *OverloadedTypes[] = {Matrix->getType(), Stride->getType()};
 
     Function *TheFn = Intrinsic::getDeclaration(
         getModule(), Intrinsic::matrix_column_major_store, OverloadedTypes);
@@ -105,7 +105,7 @@ public:
     CallInst *Call = B.CreateCall(TheFn->getFunctionType(), TheFn, Ops, Name);
     Attribute AlignAttr =
         Attribute::getWithAlignment(Call->getContext(), Alignment);
-    Call->addAttribute(2, AlignAttr);
+    Call->addParamAttr(1, AlignAttr);
     return Call;
   }
 
@@ -231,9 +231,23 @@ public:
                : (IsUnsigned ? B.CreateUDiv(LHS, RHS) : B.CreateSDiv(LHS, RHS));
   }
 
-  /// Extracts the element at (\p RowIdx, \p ColumnIdx) from \p Matrix.
-  Value *CreateExtractElement(Value *Matrix, Value *RowIdx, Value *ColumnIdx,
-                              unsigned NumRows, Twine const &Name = "") {
+  /// Create an assumption that \p Idx is less than \p NumElements.
+  void CreateIndexAssumption(Value *Idx, unsigned NumElements,
+                             Twine const &Name = "") {
+
+    Value *NumElts =
+        B.getIntN(Idx->getType()->getScalarSizeInBits(), NumElements);
+    auto *Cmp = B.CreateICmpULT(Idx, NumElts);
+    if (auto *ConstCond = dyn_cast<ConstantInt>(Cmp))
+      assert(ConstCond->isOne() && "Index must be valid!");
+    else
+      B.CreateAssumption(Cmp);
+  }
+
+  /// Compute the index to access the element at (\p RowIdx, \p ColumnIdx) from
+  /// a matrix with \p NumRows embedded in a vector.
+  Value *CreateIndex(Value *RowIdx, Value *ColumnIdx, unsigned NumRows,
+                     Twine const &Name = "") {
 
     unsigned MaxWidth = std::max(RowIdx->getType()->getScalarSizeInBits(),
                                  ColumnIdx->getType()->getScalarSizeInBits());
@@ -241,9 +255,7 @@ public:
     RowIdx = B.CreateZExt(RowIdx, IntTy);
     ColumnIdx = B.CreateZExt(ColumnIdx, IntTy);
     Value *NumRowsV = B.getIntN(MaxWidth, NumRows);
-    return B.CreateExtractElement(
-        Matrix, B.CreateAdd(B.CreateMul(ColumnIdx, NumRowsV), RowIdx),
-        "matext");
+    return B.CreateAdd(B.CreateMul(ColumnIdx, NumRowsV), RowIdx);
   }
 };
 
diff --git a/llvm/include/llvm/IR/Metadata.h b/llvm/include/llvm/IR/Metadata.h
index c5840564454e..26d70b4db2d5 100644
--- a/llvm/include/llvm/IR/Metadata.h
+++ b/llvm/include/llvm/IR/Metadata.h
@@ -707,6 +707,15 @@ struct AAMDNodes {
     Result.NoAlias = NoAlias;
     return Result;
   }
+
+  /// Given two sets of AAMDNodes applying to potentially different locations,
+  /// determine the best AAMDNodes that apply to both.
+  AAMDNodes merge(const AAMDNodes &Other) const;
+
+  /// Determine the best AAMDNodes after concatenating two different locations
+  /// together. Different from `merge`, where different locations should
+  /// overlap each other, `concat` puts non-overlapping locations together.
+  AAMDNodes concat(const AAMDNodes &Other) const;
 };
 
 // Specialize DenseMapInfo for AAMDNodes.
@@ -897,6 +906,7 @@ struct TempMDNodeDeleter {
 class MDNode : public Metadata {
   friend class ReplaceableMetadataImpl;
   friend class LLVMContextImpl;
+  friend class DIArgList;
 
   unsigned NumOperands;
   unsigned NumUnresolved;
@@ -1028,6 +1038,31 @@ public:
     return cast<T>(N.release()->replaceWithDistinctImpl());
   }
 
+  /// Print in tree shape.
+  ///
+  /// Prints definition of \c this in tree shape.
+  ///
+  /// If \c M is provided, metadata nodes will be numbered canonically;
+  /// otherwise, pointer addresses are substituted.
+  /// @{
+  void printTree(raw_ostream &OS, const Module *M = nullptr) const;
+  void printTree(raw_ostream &OS, ModuleSlotTracker &MST,
+                 const Module *M = nullptr) const;
+  /// @}
+
+  /// User-friendly dump in tree shape.
+  ///
+  /// If \c M is provided, metadata nodes will be numbered canonically;
+  /// otherwise, pointer addresses are substituted.
+  ///
+  /// Note: this uses an explicit overload instead of default arguments so that
+  /// the nullptr version is easy to call from a debugger.
+  ///
+  /// @{
+  void dumpTree() const;
+  void dumpTree(const Module *M) const;
+  /// @}
+
 private:
   MDNode *replaceWithPermanentImpl();
   MDNode *replaceWithUniquedImpl();
diff --git a/llvm/include/llvm/IR/Module.h b/llvm/include/llvm/IR/Module.h
index 81e29d9b86e8..bd3a196c7181 100644
--- a/llvm/include/llvm/IR/Module.h
+++ b/llvm/include/llvm/IR/Module.h
@@ -64,9 +64,9 @@ class VersionTuple;
 /// constant references to global variables in the module.  When a global
 /// variable is destroyed, it should have no entries in the GlobalValueRefMap.
 /// The main container class for the LLVM Intermediate Representation.
-class Module {
-/// @name Types And Enumerations
-/// @{
+class LLVM_EXTERNAL_VISIBILITY Module {
+  /// @name Types And Enumerations
+  /// @{
 public:
   /// The type for the list of global variables.
   using GlobalListType = SymbolTableList<GlobalVariable>;
@@ -324,6 +324,9 @@ public:
   /// name is not found.
   GlobalValue *getNamedValue(StringRef Name) const;
 
+  /// Return the number of global values in the module.
+  unsigned getNumNamedValues() const;
+
   /// Return a unique non-zero ID for the specified metadata kind. This ID is
   /// uniqued across modules in the current LLVMContext.
   unsigned getMDKindID(StringRef Name) const;
diff --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h
index 4b84f6b0408d..e00b78d45c63 100644
--- a/llvm/include/llvm/IR/ModuleSummaryIndex.h
+++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h
@@ -572,6 +572,50 @@ public:
     unsigned NoInline : 1;
     // Indicate if function should be always inlined.
     unsigned AlwaysInline : 1;
+    // Indicate if function never raises an exception. Can be modified during
+    // thinlink function attribute propagation
+    unsigned NoUnwind : 1;
+    // Indicate if function contains instructions that mayThrow
+    unsigned MayThrow : 1;
+
+    // If there are calls to unknown targets (e.g. indirect)
+    unsigned HasUnknownCall : 1;
+
+    FFlags &operator&=(const FFlags &RHS) {
+      this->ReadNone &= RHS.ReadNone;
+      this->ReadOnly &= RHS.ReadOnly;
+      this->NoRecurse &= RHS.NoRecurse;
+      this->ReturnDoesNotAlias &= RHS.ReturnDoesNotAlias;
+      this->NoInline &= RHS.NoInline;
+      this->AlwaysInline &= RHS.AlwaysInline;
+      this->NoUnwind &= RHS.NoUnwind;
+      this->MayThrow &= RHS.MayThrow;
+      this->HasUnknownCall &= RHS.HasUnknownCall;
+      return *this;
+    }
+
+    bool anyFlagSet() {
+      return this->ReadNone | this->ReadOnly | this->NoRecurse |
+             this->ReturnDoesNotAlias | this->NoInline | this->AlwaysInline |
+             this->NoUnwind | this->MayThrow | this->HasUnknownCall;
+    }
+
+    operator std::string() {
+      std::string Output;
+      raw_string_ostream OS(Output);
+      OS << "funcFlags: (";
+      OS << "readNone: " << this->ReadNone;
+      OS << ", readOnly: " << this->ReadOnly;
+      OS << ", noRecurse: " << this->NoRecurse;
+      OS << ", returnDoesNotAlias: " << this->ReturnDoesNotAlias;
+      OS << ", noInline: " << this->NoInline;
+      OS << ", alwaysInline: " << this->AlwaysInline;
+      OS << ", noUnwind: " << this->NoUnwind;
+      OS << ", mayThrow: " << this->MayThrow;
+      OS << ", hasUnknownCall: " << this->HasUnknownCall;
+      OS << ")";
+      return OS.str();
+    }
   };
 
   /// Describes the uses of a parameter by the function.
@@ -688,6 +732,10 @@ public:
   /// Get function summary flags.
   FFlags fflags() const { return FunFlags; }
 
+  void setNoRecurse() { FunFlags.NoRecurse = true; }
+
+  void setNoUnwind() { FunFlags.NoUnwind = true; }
+
   /// Get the instruction count recorded for this function.
   unsigned instCount() const { return InstCount; }
 
@@ -700,6 +748,8 @@ public:
   /// Return the list of <CalleeValueInfo, CalleeInfo> pairs.
   ArrayRef<EdgeTy> calls() const { return CallGraphEdgeList; }
 
+  std::vector<EdgeTy> &mutableCalls() { return CallGraphEdgeList; }
+
   void addCall(EdgeTy E) { CallGraphEdgeList.push_back(E); }
 
   /// Returns the list of type identifiers used by this function in
diff --git a/llvm/include/llvm/IR/Operator.h b/llvm/include/llvm/IR/Operator.h
index d0bce742cc96..b83d83f0d0ab 100644
--- a/llvm/include/llvm/IR/Operator.h
+++ b/llvm/include/llvm/IR/Operator.h
@@ -59,6 +59,10 @@ public:
   static bool classof(const Value *V) {
     return isa<Instruction>(V) || isa<ConstantExpr>(V);
   }
+
+  /// Return true if this operator has flags which may cause this operator
+  /// to evaluate to poison despite having non-poison inputs.
+  bool hasPoisonGeneratingFlags() const;
 };
 
 /// Utility class for integer operators which may exhibit overflow - Add, Sub,
@@ -243,6 +247,9 @@ public:
   void operator|=(const FastMathFlags &OtherFlags) {
     Flags |= OtherFlags.Flags;
   }
+  bool operator!=(const FastMathFlags &OtherFlags) const {
+    return Flags != OtherFlags.Flags;
+  }
 };
 
 /// Utility class for floating point operations which can have
diff --git a/llvm/include/llvm/IR/OptBisect.h b/llvm/include/llvm/IR/OptBisect.h
index 6c2a1b01d897..63fd98073b51 100644
--- a/llvm/include/llvm/IR/OptBisect.h
+++ b/llvm/include/llvm/IR/OptBisect.h
@@ -16,6 +16,7 @@
 
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/ManagedStatic.h"
+#include <limits>
 
 namespace llvm {
 
@@ -43,14 +44,12 @@ public:
 /// optimization-related problems.
 class OptBisect : public OptPassGate {
 public:
-  /// Default constructor, initializes the OptBisect state based on the
-  /// -opt-bisect-limit command line argument.
-  ///
-  /// By default, bisection is disabled.
-  ///
+  /// Default constructor. Initializes the state to "disabled". The bisection
+  /// will be enabled by the cl::opt call-back when the command line option
+  /// is processed.
   /// Clients should not instantiate this class directly.  All access should go
   /// through LLVMContext.
-  OptBisect();
+  OptBisect() = default;
 
   virtual ~OptBisect() = default;
 
@@ -60,7 +59,14 @@ public:
   bool shouldRunPass(const Pass *P, StringRef IRDescription) override;
 
   /// isEnabled() should return true before calling shouldRunPass().
-  bool isEnabled() const override { return BisectEnabled; }
+  bool isEnabled() const override { return BisectLimit != Disabled; }
+
+  /// Set the new optimization limit and reset the counter. Passing
+  /// OptBisect::Disabled disables the limiting.
+  void setLimit(int Limit) {
+    BisectLimit = Limit;
+    LastBisectNum = 0;
+  }
 
   /// Checks the bisect limit to determine if the specified pass should run.
   ///
@@ -75,9 +81,11 @@ public:
   /// instance, function passes should call FunctionPass::skipFunction().
   bool checkPass(const StringRef PassName, const StringRef TargetDesc);
 
+  static const int Disabled = std::numeric_limits<int>::max();
+
 private:
-  bool BisectEnabled = false;
-  unsigned LastBisectNum = 0;
+  int BisectLimit = Disabled;
+  int LastBisectNum = 0;
 };
 
 /// Singleton instance of the OptBisect class, so multiple pass managers don't
diff --git a/llvm/include/llvm/IR/PassManager.h b/llvm/include/llvm/IR/PassManager.h
index 8e592bfb0c78..e88d2233daba 100644
--- a/llvm/include/llvm/IR/PassManager.h
+++ b/llvm/include/llvm/IR/PassManager.h
@@ -377,10 +377,16 @@ template <typename DerivedT> struct PassInfoMixin {
     static_assert(std::is_base_of<PassInfoMixin, DerivedT>::value,
                   "Must pass the derived type as the template argument!");
     StringRef Name = getTypeName<DerivedT>();
-    if (Name.startswith("llvm::"))
-      Name = Name.drop_front(strlen("llvm::"));
+    Name.consume_front("llvm::");
     return Name;
   }
+
+  void printPipeline(raw_ostream &OS,
+                     function_ref<StringRef(StringRef)> MapClassName2PassName) {
+    StringRef ClassName = DerivedT::name();
+    auto PassName = MapClassName2PassName(ClassName);
+    OS << PassName;
+  }
 };
 
 /// A CRTP mix-in that provides informational APIs needed for analysis passes.
@@ -480,6 +486,16 @@ public:
     return *this;
   }
 
+  void printPipeline(raw_ostream &OS,
+                     function_ref<StringRef(StringRef)> MapClassName2PassName) {
+    for (unsigned Idx = 0, Size = Passes.size(); Idx != Size; ++Idx) {
+      auto *P = Passes[Idx].get();
+      P->printPipeline(OS, MapClassName2PassName);
+      if (Idx + 1 < Size)
+        OS << ",";
+    }
+  }
+
   /// Run all of the passes in this manager over the given unit of IR.
   /// ExtraArgs are passed to each pass.
   PreservedAnalyses run(IRUnitT &IR, AnalysisManagerT &AM,
@@ -520,12 +536,6 @@ public:
       // Finally, intersect the preserved analyses to compute the aggregate
       // preserved set for this pass manager.
       PA.intersect(std::move(PassPA));
-
-      // FIXME: Historically, the pass managers all called the LLVM context's
-      // yield function here. We don't have a generic way to acquire the
-      // context and it isn't yet clear what the right pattern is for yielding
-      // in the new pass manager so it is currently omitted.
-      //IR.getContext().yield();
     }
 
     // Invalidation was handled after each pass in the above loop for the
@@ -538,13 +548,16 @@ public:
   }
 
   template <typename PassT>
-  std::enable_if_t<!std::is_same<PassT, PassManager>::value>
-  addPass(PassT &&Pass) {
+  LLVM_ATTRIBUTE_MINSIZE
+      std::enable_if_t<!std::is_same<PassT, PassManager>::value>
+      addPass(PassT &&Pass) {
     using PassModelT =
         detail::PassModel<IRUnitT, PassT, PreservedAnalyses, AnalysisManagerT,
                           ExtraArgTs...>;
-
-    Passes.emplace_back(new PassModelT(std::forward<PassT>(Pass)));
+    // Do not use make_unique or emplace_back, they cause too many template
+    // instantiations, causing terrible compile times.
+    Passes.push_back(std::unique_ptr<PassConceptT>(
+        new PassModelT(std::forward<PassT>(Pass))));
   }
 
   /// When adding a pass manager pass that has the same type as this pass
@@ -553,10 +566,11 @@ public:
   /// implementation complexity and avoid potential invalidation issues that may
   /// happen with nested pass managers of the same type.
   template <typename PassT>
-  std::enable_if_t<std::is_same<PassT, PassManager>::value>
-  addPass(PassT &&Pass) {
+  LLVM_ATTRIBUTE_MINSIZE
+      std::enable_if_t<std::is_same<PassT, PassManager>::value>
+      addPass(PassT &&Pass) {
     for (auto &P : Pass.Passes)
-      Passes.emplace_back(std::move(P));
+      Passes.push_back(std::move(P));
   }
 
   /// Returns if the pass manager contains any passes.
@@ -1190,29 +1204,37 @@ class ModuleToFunctionPassAdaptor
 public:
   using PassConceptT = detail::PassConcept<Function, FunctionAnalysisManager>;
 
-  explicit ModuleToFunctionPassAdaptor(std::unique_ptr<PassConceptT> Pass)
-      : Pass(std::move(Pass)) {}
+  explicit ModuleToFunctionPassAdaptor(std::unique_ptr<PassConceptT> Pass,
+                                       bool EagerlyInvalidate)
+      : Pass(std::move(Pass)), EagerlyInvalidate(EagerlyInvalidate) {}
 
   /// Runs the function pass across every function in the module.
   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+  void printPipeline(raw_ostream &OS,
+                     function_ref<StringRef(StringRef)> MapClassName2PassName);
 
   static bool isRequired() { return true; }
 
 private:
   std::unique_ptr<PassConceptT> Pass;
+  bool EagerlyInvalidate;
 };
 
 /// A function to deduce a function pass type and wrap it in the
 /// templated adaptor.
 template <typename FunctionPassT>
 ModuleToFunctionPassAdaptor
-createModuleToFunctionPassAdaptor(FunctionPassT &&Pass) {
+createModuleToFunctionPassAdaptor(FunctionPassT &&Pass,
+                                  bool EagerlyInvalidate = false) {
   using PassModelT =
       detail::PassModel<Function, FunctionPassT, PreservedAnalyses,
                         FunctionAnalysisManager>;
-
+  // Do not use make_unique, it causes too many template instantiations,
+  // causing terrible compile times.
   return ModuleToFunctionPassAdaptor(
-      std::make_unique<PassModelT>(std::forward<FunctionPassT>(Pass)));
+      std::unique_ptr<ModuleToFunctionPassAdaptor::PassConceptT>(
+          new PassModelT(std::forward<FunctionPassT>(Pass))),
+      EagerlyInvalidate);
 }
 
 /// A utility pass template to force an analysis result to be available.
@@ -1243,6 +1265,12 @@ struct RequireAnalysisPass
 
     return PreservedAnalyses::all();
   }
+  void printPipeline(raw_ostream &OS,
+                     function_ref<StringRef(StringRef)> MapClassName2PassName) {
+    auto ClassName = AnalysisT::name();
+    auto PassName = MapClassName2PassName(ClassName);
+    OS << "require<" << PassName << ">";
+  }
   static bool isRequired() { return true; }
 };
 
@@ -1263,6 +1291,12 @@ struct InvalidateAnalysisPass
     PA.abandon<AnalysisT>();
     return PA;
   }
+  void printPipeline(raw_ostream &OS,
+                     function_ref<StringRef(StringRef)> MapClassName2PassName) {
+    auto ClassName = AnalysisT::name();
+    auto PassName = MapClassName2PassName(ClassName);
+    OS << "invalidate<" << PassName << ">";
+  }
 };
 
 /// A utility pass that does nothing, but preserves no analyses.
@@ -1312,6 +1346,13 @@ public:
     return PA;
   }
 
+  void printPipeline(raw_ostream &OS,
+                     function_ref<StringRef(StringRef)> MapClassName2PassName) {
+    OS << "repeat<" << Count << ">(";
+    P.printPipeline(OS, MapClassName2PassName);
+    OS << ")";
+  }
+
 private:
   int Count;
   PassT P;
diff --git a/llvm/include/llvm/IR/PassManagerInternal.h b/llvm/include/llvm/IR/PassManagerInternal.h
index 8f42e69f3063..29b55a8172e6 100644
--- a/llvm/include/llvm/IR/PassManagerInternal.h
+++ b/llvm/include/llvm/IR/PassManagerInternal.h
@@ -46,6 +46,9 @@ struct PassConcept {
   virtual PreservedAnalyses run(IRUnitT &IR, AnalysisManagerT &AM,
                                 ExtraArgTs... ExtraArgs) = 0;
 
+  virtual void
+  printPipeline(raw_ostream &OS,
+                function_ref<StringRef(StringRef)> MapClassName2PassName) = 0;
   /// Polymorphic method to access the name of a pass.
   virtual StringRef name() const = 0;
 
@@ -85,6 +88,12 @@ struct PassModel : PassConcept<IRUnitT, AnalysisManagerT, ExtraArgTs...> {
     return Pass.run(IR, AM, ExtraArgs...);
   }
 
+  void printPipeline(
+      raw_ostream &OS,
+      function_ref<StringRef(StringRef)> MapClassName2PassName) override {
+    Pass.printPipeline(OS, MapClassName2PassName);
+  }
+
   StringRef name() const override { return PassT::name(); }
 
   template <typename T>
diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h
index cbd429f84ee4..b858733530e3 100644
--- a/llvm/include/llvm/IR/PatternMatch.h
+++ b/llvm/include/llvm/IR/PatternMatch.h
@@ -438,7 +438,7 @@ inline cst_pred_ty<is_any_apint> m_AnyIntegralConstant() {
 }
 
 struct is_all_ones {
-  bool isValue(const APInt &C) { return C.isAllOnesValue(); }
+  bool isValue(const APInt &C) { return C.isAllOnes(); }
 };
 /// Match an integer or vector with all bits set.
 /// For vectors, this includes constants with undefined elements.
@@ -506,7 +506,7 @@ inline cst_pred_ty<is_nonpositive> m_NonPositive() {
 inline api_pred_ty<is_nonpositive> m_NonPositive(const APInt *&V) { return V; }
 
 struct is_one {
-  bool isValue(const APInt &C) { return C.isOneValue(); }
+  bool isValue(const APInt &C) { return C.isOne(); }
 };
 /// Match an integer 1 or a vector with all elements equal to 1.
 /// For vectors, this includes constants with undefined elements.
@@ -515,7 +515,7 @@ inline cst_pred_ty<is_one> m_One() {
 }
 
 struct is_zero_int {
-  bool isValue(const APInt &C) { return C.isNullValue(); }
+  bool isValue(const APInt &C) { return C.isZero(); }
 };
 /// Match an integer 0 or a vector with all elements equal to 0.
 /// For vectors, this includes constants with undefined elements.
@@ -549,7 +549,7 @@ inline api_pred_ty<is_power2> m_Power2(const APInt *&V) {
 }
 
 struct is_negated_power2 {
-  bool isValue(const APInt &C) { return (-C).isPowerOf2(); }
+  bool isValue(const APInt &C) { return C.isNegatedPowerOf2(); }
 };
 /// Match a integer or vector negated power-of-2.
 /// For vectors, this includes constants with undefined elements.
@@ -593,32 +593,7 @@ inline cst_pred_ty<is_lowbit_mask> m_LowBitMask() {
 struct icmp_pred_with_threshold {
   ICmpInst::Predicate Pred;
   const APInt *Thr;
-  bool isValue(const APInt &C) {
-    switch (Pred) {
-    case ICmpInst::Predicate::ICMP_EQ:
-      return C.eq(*Thr);
-    case ICmpInst::Predicate::ICMP_NE:
-      return C.ne(*Thr);
-    case ICmpInst::Predicate::ICMP_UGT:
-      return C.ugt(*Thr);
-    case ICmpInst::Predicate::ICMP_UGE:
-      return C.uge(*Thr);
-    case ICmpInst::Predicate::ICMP_ULT:
-      return C.ult(*Thr);
-    case ICmpInst::Predicate::ICMP_ULE:
-      return C.ule(*Thr);
-    case ICmpInst::Predicate::ICMP_SGT:
-      return C.sgt(*Thr);
-    case ICmpInst::Predicate::ICMP_SGE:
-      return C.sge(*Thr);
-    case ICmpInst::Predicate::ICMP_SLT:
-      return C.slt(*Thr);
-    case ICmpInst::Predicate::ICMP_SLE:
-      return C.sle(*Thr);
-    default:
-      llvm_unreachable("Unhandled ICmp predicate");
-    }
-  }
+  bool isValue(const APInt &C) { return ICmpInst::compare(C, *Thr, Pred); }
 };
 /// Match an integer or vector with every element comparing 'pred' (eg/ne/...)
 /// to Threshold. For vectors, this includes constants with undefined elements.
@@ -988,20 +963,22 @@ struct BinaryOp_match {
   // The LHS is always matched first.
   BinaryOp_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {}
 
-  template <typename OpTy> bool match(OpTy *V) {
-    if (V->getValueID() == Value::InstructionVal + Opcode) {
+  template <typename OpTy> inline bool match(unsigned Opc, OpTy *V) {
+    if (V->getValueID() == Value::InstructionVal + Opc) {
       auto *I = cast<BinaryOperator>(V);
       return (L.match(I->getOperand(0)) && R.match(I->getOperand(1))) ||
              (Commutable && L.match(I->getOperand(1)) &&
               R.match(I->getOperand(0)));
     }
     if (auto *CE = dyn_cast<ConstantExpr>(V))
-      return CE->getOpcode() == Opcode &&
+      return CE->getOpcode() == Opc &&
              ((L.match(CE->getOperand(0)) && R.match(CE->getOperand(1))) ||
               (Commutable && L.match(CE->getOperand(1)) &&
                R.match(CE->getOperand(0))));
     return false;
   }
+
+  template <typename OpTy> bool match(OpTy *V) { return match(Opcode, V); }
 };
 
 template <typename LHS, typename RHS>
@@ -1246,6 +1223,26 @@ m_NUWShl(const LHS &L, const RHS &R) {
       L, R);
 }
 
+template <typename LHS_t, typename RHS_t, bool Commutable = false>
+struct SpecificBinaryOp_match
+    : public BinaryOp_match<LHS_t, RHS_t, 0, Commutable> {
+  unsigned Opcode;
+
+  SpecificBinaryOp_match(unsigned Opcode, const LHS_t &LHS, const RHS_t &RHS)
+      : BinaryOp_match<LHS_t, RHS_t, 0, Commutable>(LHS, RHS), Opcode(Opcode) {}
+
+  template <typename OpTy> bool match(OpTy *V) {
+    return BinaryOp_match<LHS_t, RHS_t, 0, Commutable>::match(Opcode, V);
+  }
+};
+
+/// Matches a specific opcode.
+template <typename LHS, typename RHS>
+inline SpecificBinaryOp_match<LHS, RHS> m_BinOp(unsigned Opcode, const LHS &L,
+                                                const RHS &R) {
+  return SpecificBinaryOp_match<LHS, RHS>(Opcode, L, R);
+}
+
 //===----------------------------------------------------------------------===//
 // Class that matches a group of binary opcodes.
 //
@@ -2223,6 +2220,13 @@ m_c_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R) {
                                                                        R);
 }
 
+/// Matches a specific opcode with LHS and RHS in either order.
+template <typename LHS, typename RHS>
+inline SpecificBinaryOp_match<LHS, RHS, true>
+m_c_BinOp(unsigned Opcode, const LHS &L, const RHS &R) {
+  return SpecificBinaryOp_match<LHS, RHS, true>(Opcode, L, R);
+}
+
 /// Matches a Add with LHS and RHS in either order.
 template <typename LHS, typename RHS>
 inline BinaryOp_match<LHS, RHS, Instruction::Add, true> m_c_Add(const LHS &L,
@@ -2456,7 +2460,7 @@ inline VScaleVal_match m_VScale(const DataLayout &DL) {
   return VScaleVal_match(DL);
 }
 
-template <typename LHS, typename RHS, unsigned Opcode>
+template <typename LHS, typename RHS, unsigned Opcode, bool Commutable = false>
 struct LogicalOp_match {
   LHS L;
   RHS R;
@@ -2464,27 +2468,32 @@ struct LogicalOp_match {
   LogicalOp_match(const LHS &L, const RHS &R) : L(L), R(R) {}
 
   template <typename T> bool match(T *V) {
-    if (auto *I = dyn_cast<Instruction>(V)) {
-      if (!I->getType()->isIntOrIntVectorTy(1))
-        return false;
+    auto *I = dyn_cast<Instruction>(V);
+    if (!I || !I->getType()->isIntOrIntVectorTy(1))
+      return false;
 
-      if (I->getOpcode() == Opcode && L.match(I->getOperand(0)) &&
-          R.match(I->getOperand(1)))
-        return true;
+    if (I->getOpcode() == Opcode) {
+      auto *Op0 = I->getOperand(0);
+      auto *Op1 = I->getOperand(1);
+      return (L.match(Op0) && R.match(Op1)) ||
+             (Commutable && L.match(Op1) && R.match(Op0));
+    }
 
-      if (auto *SI = dyn_cast<SelectInst>(I)) {
-        if (Opcode == Instruction::And) {
-          if (const auto *C = dyn_cast<Constant>(SI->getFalseValue()))
-            if (C->isNullValue() && L.match(SI->getCondition()) &&
-                R.match(SI->getTrueValue()))
-              return true;
-        } else {
-          assert(Opcode == Instruction::Or);
-          if (const auto *C = dyn_cast<Constant>(SI->getTrueValue()))
-            if (C->isOneValue() && L.match(SI->getCondition()) &&
-                R.match(SI->getFalseValue()))
-              return true;
-        }
+    if (auto *Select = dyn_cast<SelectInst>(I)) {
+      auto *Cond = Select->getCondition();
+      auto *TVal = Select->getTrueValue();
+      auto *FVal = Select->getFalseValue();
+      if (Opcode == Instruction::And) {
+        auto *C = dyn_cast<Constant>(FVal);
+        if (C && C->isNullValue())
+          return (L.match(Cond) && R.match(TVal)) ||
+                 (Commutable && L.match(TVal) && R.match(Cond));
+      } else {
+        assert(Opcode == Instruction::Or);
+        auto *C = dyn_cast<Constant>(TVal);
+        if (C && C->isOneValue())
+          return (L.match(Cond) && R.match(FVal)) ||
+                 (Commutable && L.match(FVal) && R.match(Cond));
       }
     }
 
@@ -2503,6 +2512,13 @@ m_LogicalAnd(const LHS &L, const RHS &R) {
 /// Matches L && R where L and R are arbitrary values.
 inline auto m_LogicalAnd() { return m_LogicalAnd(m_Value(), m_Value()); }
 
+/// Matches L && R with LHS and RHS in either order.
+template <typename LHS, typename RHS>
+inline LogicalOp_match<LHS, RHS, Instruction::And, true>
+m_c_LogicalAnd(const LHS &L, const RHS &R) {
+  return LogicalOp_match<LHS, RHS, Instruction::And, true>(L, R);
+}
+
 /// Matches L || R either in the form of L | R or L ? true : R.
 /// Note that the latter form is poison-blocking.
 template <typename LHS, typename RHS>
@@ -2512,8 +2528,13 @@ m_LogicalOr(const LHS &L, const RHS &R) {
 }
 
 /// Matches L || R where L and R are arbitrary values.
-inline auto m_LogicalOr() {
-  return m_LogicalOr(m_Value(), m_Value());
+inline auto m_LogicalOr() { return m_LogicalOr(m_Value(), m_Value()); }
+
+/// Matches L || R with LHS and RHS in either order.
+template <typename LHS, typename RHS>
+inline LogicalOp_match<LHS, RHS, Instruction::Or, true>
+m_c_LogicalOr(const LHS &L, const RHS &R) {
+  return LogicalOp_match<LHS, RHS, Instruction::Or, true>(L, R);
 }
 
 } // end namespace PatternMatch
diff --git a/llvm/include/llvm/IR/ProfileSummary.h b/llvm/include/llvm/IR/ProfileSummary.h
index 889568e7946b..4bb6bb8d4a40 100644
--- a/llvm/include/llvm/IR/ProfileSummary.h
+++ b/llvm/include/llvm/IR/ProfileSummary.h
@@ -31,9 +31,9 @@ class raw_ostream;
 // number of counts needed to reach this target and the minimum among these
 // counts.
 struct ProfileSummaryEntry {
-  uint32_t Cutoff;    ///< The required percentile of counts.
-  uint64_t MinCount;  ///< The minimum count for this percentile.
-  uint64_t NumCounts; ///< Number of counts >= the minimum count.
+  const uint32_t Cutoff;    ///< The required percentile of counts.
+  const uint64_t MinCount;  ///< The minimum count for this percentile.
+  const uint64_t NumCounts; ///< Number of counts >= the minimum count.
 
   ProfileSummaryEntry(uint32_t TheCutoff, uint64_t TheMinCount,
                       uint64_t TheNumCounts)
@@ -48,9 +48,9 @@ public:
 
 private:
   const Kind PSK;
-  SummaryEntryVector DetailedSummary;
-  uint64_t TotalCount, MaxCount, MaxInternalCount, MaxFunctionCount;
-  uint32_t NumCounts, NumFunctions;
+  const SummaryEntryVector DetailedSummary;
+  const uint64_t TotalCount, MaxCount, MaxInternalCount, MaxFunctionCount;
+  const uint32_t NumCounts, NumFunctions;
   /// If 'Partial' is false, it means the profile being used to optimize
   /// a target is collected from the same target.
   /// If 'Partial' is true, it means the profile is for common/shared
@@ -61,14 +61,14 @@ private:
   /// of the program being built to the number of profile counters in the
   /// partial sample profile. When 'Partial' is false, it is undefined. This is
   /// currently only available under thin LTO mode.
-  double PartialProfileRatio = 0;
+  double PartialProfileRatio = 0.0;
   /// Return detailed summary as metadata.
   Metadata *getDetailedSummaryMD(LLVMContext &Context);
 
 public:
   static const int Scale = 1000000;
 
-  ProfileSummary(Kind K, SummaryEntryVector DetailedSummary,
+  ProfileSummary(Kind K, const SummaryEntryVector &DetailedSummary,
                  uint64_t TotalCount, uint64_t MaxCount,
                  uint64_t MaxInternalCount, uint64_t MaxFunctionCount,
                  uint32_t NumCounts, uint32_t NumFunctions,
@@ -85,22 +85,22 @@ public:
                   bool AddPartialProfileRatioField = true);
   /// Construct profile summary from metdata.
   static ProfileSummary *getFromMD(Metadata *MD);
-  SummaryEntryVector &getDetailedSummary() { return DetailedSummary; }
-  uint32_t getNumFunctions() { return NumFunctions; }
-  uint64_t getMaxFunctionCount() { return MaxFunctionCount; }
-  uint32_t getNumCounts() { return NumCounts; }
-  uint64_t getTotalCount() { return TotalCount; }
-  uint64_t getMaxCount() { return MaxCount; }
-  uint64_t getMaxInternalCount() { return MaxInternalCount; }
+  const SummaryEntryVector &getDetailedSummary() { return DetailedSummary; }
+  uint32_t getNumFunctions() const { return NumFunctions; }
+  uint64_t getMaxFunctionCount() const { return MaxFunctionCount; }
+  uint32_t getNumCounts() const { return NumCounts; }
+  uint64_t getTotalCount() const { return TotalCount; }
+  uint64_t getMaxCount() const { return MaxCount; }
+  uint64_t getMaxInternalCount() const { return MaxInternalCount; }
   void setPartialProfile(bool PP) { Partial = PP; }
-  bool isPartialProfile() { return Partial; }
-  double getPartialProfileRatio() { return PartialProfileRatio; }
+  bool isPartialProfile() const { return Partial; }
+  double getPartialProfileRatio() const { return PartialProfileRatio; }
   void setPartialProfileRatio(double R) {
     assert(isPartialProfile() && "Unexpected when not partial profile");
     PartialProfileRatio = R;
   }
-  void printSummary(raw_ostream &OS);
-  void printDetailedSummary(raw_ostream &OS);
+  void printSummary(raw_ostream &OS) const;
+  void printDetailedSummary(raw_ostream &OS) const;
 };
 
 } // end namespace llvm
diff --git a/llvm/include/llvm/IR/PseudoProbe.h b/llvm/include/llvm/IR/PseudoProbe.h
index 53100f049910..51ba7e675efe 100644
--- a/llvm/include/llvm/IR/PseudoProbe.h
+++ b/llvm/include/llvm/IR/PseudoProbe.h
@@ -27,10 +27,6 @@ constexpr const char *PseudoProbeDescMetadataName = "llvm.pseudo_probe_desc";
 
 enum class PseudoProbeType { Block = 0, IndirectCall, DirectCall };
 
-enum class PseudoProbeAttributes {
-  Reserved = 0x1, // Reserved for future use.
-};
-
 // The saturated distrution factor representing 100% for block probes.
 constexpr static uint64_t PseudoProbeFullDistributionFactor =
     std::numeric_limits<uint64_t>::max();
diff --git a/llvm/include/llvm/IR/ReplaceConstant.h b/llvm/include/llvm/IR/ReplaceConstant.h
index 4d95143a4bd2..5ad1d0a6f920 100644
--- a/llvm/include/llvm/IR/ReplaceConstant.h
+++ b/llvm/include/llvm/IR/ReplaceConstant.h
@@ -21,10 +21,6 @@
 
 namespace llvm {
 
-/// Create a replacement instruction for constant expression \p CE and insert
-/// it before \p Instr.
-Instruction *createReplacementInstr(ConstantExpr *CE, Instruction *Instr);
-
 /// The given instruction \p I contains given constant expression \p CE as one
 /// of its operands, possibly nested within constant expression trees. Convert
 /// all reachable paths from contant expression operands of \p I to \p CE into
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.def b/llvm/include/llvm/IR/RuntimeLibcalls.def
index c73172612b1e..62d67308114f 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.def
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.def
@@ -287,6 +287,7 @@ HANDLE_LIBCALL(FPEXT_F80_F128, "__extendxftf2")
 HANDLE_LIBCALL(FPEXT_F64_F128, "__extenddftf2")
 HANDLE_LIBCALL(FPEXT_F32_F128, "__extendsftf2")
 HANDLE_LIBCALL(FPEXT_F16_F128, "__extendhftf2")
+HANDLE_LIBCALL(FPEXT_F16_F80, "__extendhfxf2")
 HANDLE_LIBCALL(FPEXT_F32_F64, "__extendsfdf2")
 HANDLE_LIBCALL(FPEXT_F16_F64, "__extendhfdf2")
 HANDLE_LIBCALL(FPEXT_F16_F32, "__gnu_h2f_ieee")
@@ -375,6 +376,8 @@ HANDLE_LIBCALL(UINTTOFP_I128_F64, "__floatuntidf")
 HANDLE_LIBCALL(UINTTOFP_I128_F80, "__floatuntixf")
 HANDLE_LIBCALL(UINTTOFP_I128_F128, "__floatuntitf")
 HANDLE_LIBCALL(UINTTOFP_I128_PPCF128, "__floatuntitf")
+HANDLE_LIBCALL(CONVERT_F128_PPCF128, "__extendkftf2")
+HANDLE_LIBCALL(CONVERT_PPCF128_F128, "__trunctfkf2")
 
 // Comparison
 HANDLE_LIBCALL(OEQ_F32, "__eqsf2")
@@ -431,6 +434,7 @@ HANDLE_LIBCALL(MEMSET_ELEMENT_UNORDERED_ATOMIC_16, "__llvm_memset_element_unorde
 
 // Exception handling
 HANDLE_LIBCALL(UNWIND_RESUME, "_Unwind_Resume")
+HANDLE_LIBCALL(CXA_END_CLEANUP, "__cxa_end_cleanup")
 
 // Note: there are two sets of atomics libcalls; see
 // <https://llvm.org/docs/Atomics.html> for more info on the
diff --git a/llvm/include/llvm/IR/Type.h b/llvm/include/llvm/IR/Type.h
index 430bc34a47e7..47431adc6fac 100644
--- a/llvm/include/llvm/IR/Type.h
+++ b/llvm/include/llvm/IR/Type.h
@@ -14,7 +14,6 @@
 #ifndef LLVM_IR_TYPE_H
 #define LLVM_IR_TYPE_H
 
-#include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Support/CBindingWrapping.h"
@@ -29,6 +28,7 @@
 namespace llvm {
 
 class IntegerType;
+struct fltSemantics;
 class LLVMContext;
 class PointerType;
 class raw_ostream;
@@ -166,18 +166,7 @@ public:
            getTypeID() == PPC_FP128TyID;
   }
 
-  const fltSemantics &getFltSemantics() const {
-    switch (getTypeID()) {
-    case HalfTyID: return APFloat::IEEEhalf();
-    case BFloatTyID: return APFloat::BFloat();
-    case FloatTyID: return APFloat::IEEEsingle();
-    case DoubleTyID: return APFloat::IEEEdouble();
-    case X86_FP80TyID: return APFloat::x87DoubleExtended();
-    case FP128TyID: return APFloat::IEEEquad();
-    case PPC_FP128TyID: return APFloat::PPCDoubleDouble();
-    default: llvm_unreachable("Invalid floating type");
-    }
-  }
+  const fltSemantics &getFltSemantics() const;
 
   /// Return true if this is X86 MMX.
   bool isX86_MMXTy() const { return getTypeID() == X86_MMXTyID; }
@@ -312,7 +301,7 @@ public:
 
   /// Return whether the type is IEEE compatible, as defined by the eponymous
   /// method in APFloat.
-  bool isIEEE() const { return APFloat::getZero(getFltSemantics()).isIEEE(); }
+  bool isIEEE() const;
 
   /// If this is a vector type, return the element type, otherwise return
   /// 'this'.
@@ -443,26 +432,7 @@ public:
     }
     llvm_unreachable("Unsupported type in Type::getScalarTy");
   }
-  static Type *getFloatingPointTy(LLVMContext &C, const fltSemantics &S) {
-    Type *Ty;
-    if (&S == &APFloat::IEEEhalf())
-      Ty = Type::getHalfTy(C);
-    else if (&S == &APFloat::BFloat())
-      Ty = Type::getBFloatTy(C);
-    else if (&S == &APFloat::IEEEsingle())
-      Ty = Type::getFloatTy(C);
-    else if (&S == &APFloat::IEEEdouble())
-      Ty = Type::getDoubleTy(C);
-    else if (&S == &APFloat::x87DoubleExtended())
-      Ty = Type::getX86_FP80Ty(C);
-    else if (&S == &APFloat::IEEEquad())
-      Ty = Type::getFP128Ty(C);
-    else {
-      assert(&S == &APFloat::PPCDoubleDouble() && "Unknown FP format");
-      Ty = Type::getPPC_FP128Ty(C);
-    }
-    return Ty;
-  }
+  static Type *getFloatingPointTy(LLVMContext &C, const fltSemantics &S);
 
   //===--------------------------------------------------------------------===//
   // Convenience methods for getting pointer types with one of the above builtin
diff --git a/llvm/include/llvm/IR/VPIntrinsics.def b/llvm/include/llvm/IR/VPIntrinsics.def
index 92e2cd3a2783..361d6357b303 100644
--- a/llvm/include/llvm/IR/VPIntrinsics.def
+++ b/llvm/include/llvm/IR/VPIntrinsics.def
@@ -111,6 +111,21 @@ END_REGISTER_VP_SDNODE(SDOPC)
 #define HANDLE_VP_IS_MEMOP(VPID, POINTERPOS, DATAPOS)
 #endif
 
+// Map this VP reduction intrinsic to its reduction operand positions.
+#ifndef HANDLE_VP_REDUCTION
+#define HANDLE_VP_REDUCTION(ID, STARTPOS, VECTORPOS)
+#endif
+
+// A property to infer VP binary-op SDNode opcodes automatically.
+#ifndef PROPERTY_VP_BINARYOP_SDNODE
+#define PROPERTY_VP_BINARYOP_SDNODE(ID)
+#endif
+
+// A property to infer VP reduction SDNode opcodes automatically.
+#ifndef PROPERTY_VP_REDUCTION_SDNODE
+#define PROPERTY_VP_REDUCTION_SDNODE(ID)
+#endif
+
 /// } Property Macros
 
 ///// Integer Arithmetic {
@@ -122,6 +137,7 @@ END_REGISTER_VP_SDNODE(SDOPC)
 #define HELPER_REGISTER_BINARY_INT_VP(INTRIN, SDOPC, OPC) \
 BEGIN_REGISTER_VP(INTRIN, 2, 3, SDOPC, -1) \
 HANDLE_VP_TO_OPC(OPC) \
+PROPERTY_VP_BINARYOP_SDNODE(SDOPC) \
 END_REGISTER_VP(INTRIN, SDOPC)
 
 
@@ -181,6 +197,7 @@ HELPER_REGISTER_BINARY_INT_VP(vp_xor, VP_XOR, Xor)
   BEGIN_REGISTER_VP(vp_##OPSUFFIX, 2, 3, SDOPC, -1)                            \
   HANDLE_VP_TO_OPC(OPC)                                                        \
   HANDLE_VP_TO_CONSTRAINEDFP(1, 1, experimental_constrained_##OPSUFFIX)        \
+  PROPERTY_VP_BINARYOP_SDNODE(SDOPC)                                           \
   END_REGISTER_VP(vp_##OPSUFFIX, SDOPC)
 
 // llvm.vp.fadd(x,y,mask,vlen)
@@ -204,33 +221,146 @@ HELPER_REGISTER_BINARY_FP_VP(frem, VP_FREM, FRem)
 
 ///// Memory Operations {
 // llvm.vp.store(ptr,val,mask,vlen)
-BEGIN_REGISTER_VP(vp_store, 2, 3, VP_STORE, 0)
+BEGIN_REGISTER_VP_INTRINSIC(vp_store, 2, 3)
+// chain = VP_STORE chain,val,base,offset,mask,evl
+BEGIN_REGISTER_VP_SDNODE(VP_STORE, 0, vp_store, 4, 5)
 HANDLE_VP_TO_OPC(Store)
 HANDLE_VP_TO_INTRIN(masked_store)
 HANDLE_VP_IS_MEMOP(vp_store, 1, 0)
 END_REGISTER_VP(vp_store, VP_STORE)
 
 // llvm.vp.scatter(ptr,val,mask,vlen)
-BEGIN_REGISTER_VP(vp_scatter, 2, 3, VP_SCATTER, 0)
+BEGIN_REGISTER_VP_INTRINSIC(vp_scatter, 2, 3)
+// chain = VP_SCATTER chain,val,base,indices,scale,mask,evl
+BEGIN_REGISTER_VP_SDNODE(VP_SCATTER, -1, vp_scatter, 5, 6)
 HANDLE_VP_TO_INTRIN(masked_scatter)
 HANDLE_VP_IS_MEMOP(vp_scatter, 1, 0)
 END_REGISTER_VP(vp_scatter, VP_SCATTER)
 
 // llvm.vp.load(ptr,mask,vlen)
-BEGIN_REGISTER_VP(vp_load, 1, 2, VP_LOAD, -1)
+BEGIN_REGISTER_VP_INTRINSIC(vp_load, 1, 2)
+// val,chain = VP_LOAD chain,base,offset,mask,evl
+BEGIN_REGISTER_VP_SDNODE(VP_LOAD, -1, vp_load, 3, 4)
 HANDLE_VP_TO_OPC(Load)
 HANDLE_VP_TO_INTRIN(masked_load)
 HANDLE_VP_IS_MEMOP(vp_load, 0, None)
 END_REGISTER_VP(vp_load, VP_LOAD)
 
 // llvm.vp.gather(ptr,mask,vlen)
-BEGIN_REGISTER_VP(vp_gather, 1, 2, VP_GATHER, -1)
+BEGIN_REGISTER_VP_INTRINSIC(vp_gather, 1, 2)
+// val,chain = VP_GATHER chain,base,indices,scale,mask,evl
+BEGIN_REGISTER_VP_SDNODE(VP_GATHER, -1, vp_gather, 4, 5)
 HANDLE_VP_TO_INTRIN(masked_gather)
 HANDLE_VP_IS_MEMOP(vp_gather, 0, None)
 END_REGISTER_VP(vp_gather, VP_GATHER)
 
 ///// } Memory Operations
 
+///// Reductions {
+
+// Specialized helper macro for VP reductions (%start, %x, %mask, %evl).
+#ifdef HELPER_REGISTER_REDUCTION_VP
+#error "The internal helper macro HELPER_REGISTER_REDUCTION_VP is already defined!"
+#endif
+#define HELPER_REGISTER_REDUCTION_VP(VPINTRIN, SDOPC, INTRIN) \
+BEGIN_REGISTER_VP(VPINTRIN, 2, 3, SDOPC, -1) \
+HANDLE_VP_TO_INTRIN(INTRIN) \
+HANDLE_VP_REDUCTION(VPINTRIN, 0, 1) \
+PROPERTY_VP_REDUCTION_SDNODE(SDOPC) \
+END_REGISTER_VP(VPINTRIN, SDOPC)
+
+// llvm.vp.reduce.add(start,x,mask,vlen)
+HELPER_REGISTER_REDUCTION_VP(vp_reduce_add, VP_REDUCE_ADD,
+                             experimental_vector_reduce_add)
+
+// llvm.vp.reduce.mul(start,x,mask,vlen)
+HELPER_REGISTER_REDUCTION_VP(vp_reduce_mul, VP_REDUCE_MUL,
+                             experimental_vector_reduce_mul)
+
+// llvm.vp.reduce.and(start,x,mask,vlen)
+HELPER_REGISTER_REDUCTION_VP(vp_reduce_and, VP_REDUCE_AND,
+                             experimental_vector_reduce_and)
+
+// llvm.vp.reduce.or(start,x,mask,vlen)
+HELPER_REGISTER_REDUCTION_VP(vp_reduce_or, VP_REDUCE_OR,
+                             experimental_vector_reduce_or)
+
+// llvm.vp.reduce.xor(start,x,mask,vlen)
+HELPER_REGISTER_REDUCTION_VP(vp_reduce_xor, VP_REDUCE_XOR,
+                             experimental_vector_reduce_xor)
+
+// llvm.vp.reduce.smax(start,x,mask,vlen)
+HELPER_REGISTER_REDUCTION_VP(vp_reduce_smax, VP_REDUCE_SMAX,
+                             experimental_vector_reduce_smax)
+
+// llvm.vp.reduce.smin(start,x,mask,vlen)
+HELPER_REGISTER_REDUCTION_VP(vp_reduce_smin, VP_REDUCE_SMIN,
+                             experimental_vector_reduce_smin)
+
+// llvm.vp.reduce.umax(start,x,mask,vlen)
+HELPER_REGISTER_REDUCTION_VP(vp_reduce_umax, VP_REDUCE_UMAX,
+                             experimental_vector_reduce_umax)
+
+// llvm.vp.reduce.umin(start,x,mask,vlen)
+HELPER_REGISTER_REDUCTION_VP(vp_reduce_umin, VP_REDUCE_UMIN,
+                             experimental_vector_reduce_umin)
+
+// llvm.vp.reduce.fmax(start,x,mask,vlen)
+HELPER_REGISTER_REDUCTION_VP(vp_reduce_fmax, VP_REDUCE_FMAX,
+                             experimental_vector_reduce_fmax)
+
+// llvm.vp.reduce.fmin(start,x,mask,vlen)
+HELPER_REGISTER_REDUCTION_VP(vp_reduce_fmin, VP_REDUCE_FMIN,
+                             experimental_vector_reduce_fmin)
+
+#undef HELPER_REGISTER_REDUCTION_VP
+
+// Specialized helper macro for VP reductions as above but with two forms:
+// sequential and reassociative. These manifest as the presence of 'reassoc'
+// fast-math flags in the IR and as two distinct ISD opcodes in the
+// SelectionDAG.
+#ifdef HELPER_REGISTER_REDUCTION_SEQ_VP
+#error "The internal helper macro HELPER_REGISTER_REDUCTION_SEQ_VP is already defined!"
+#endif
+#define HELPER_REGISTER_REDUCTION_SEQ_VP(VPINTRIN, SDOPC, SEQ_SDOPC, INTRIN) \
+BEGIN_REGISTER_VP_INTRINSIC(VPINTRIN, 2, 3) \
+BEGIN_REGISTER_VP_SDNODE(SDOPC, -1, VPINTRIN, 2, 3) \
+END_REGISTER_VP_SDNODE(SDOPC) \
+BEGIN_REGISTER_VP_SDNODE(SEQ_SDOPC, -1, VPINTRIN, 2, 3) \
+END_REGISTER_VP_SDNODE(SEQ_SDOPC) \
+HANDLE_VP_TO_INTRIN(INTRIN) \
+HANDLE_VP_REDUCTION(VPINTRIN, 0, 1) \
+PROPERTY_VP_REDUCTION_SDNODE(SDOPC) \
+PROPERTY_VP_REDUCTION_SDNODE(SEQ_SDOPC) \
+END_REGISTER_VP_INTRINSIC(VPINTRIN)
+
+// llvm.vp.reduce.fadd(start,x,mask,vlen)
+HELPER_REGISTER_REDUCTION_SEQ_VP(vp_reduce_fadd, VP_REDUCE_FADD,
+                                 VP_REDUCE_SEQ_FADD,
+                                 experimental_vector_reduce_fadd)
+
+// llvm.vp.reduce.fmul(start,x,mask,vlen)
+HELPER_REGISTER_REDUCTION_SEQ_VP(vp_reduce_fmul, VP_REDUCE_FMUL,
+                                 VP_REDUCE_SEQ_FMUL,
+                                 experimental_vector_reduce_fmul)
+
+#undef HELPER_REGISTER_REDUCTION_SEQ_VP
+
+///// } Reduction
+
+///// Shuffles {
+
+// llvm.vp.select(mask,on_true,on_false,vlen)
+BEGIN_REGISTER_VP_INTRINSIC(vp_select, 0, 3)
+// BEGIN_REGISTER_VP_SDNODE(VP_SELECT, -1, vp_select, 0, 4)
+// END_REGISTER_CASES(vp_select, VP_SELECT)
+END_REGISTER_VP_INTRINSIC(vp_select)
+
+BEGIN_REGISTER_VP(experimental_vp_splice, 3, 5,
+                  EXPERIMENTAL_VP_SPLICE, -1)
+END_REGISTER_VP(experimental_vp_splice, EXPERIMENTAL_VP_SPLICE)
+
+///// } Shuffles
 
 #undef BEGIN_REGISTER_VP
 #undef BEGIN_REGISTER_VP_INTRINSIC
@@ -242,3 +372,6 @@ END_REGISTER_VP(vp_gather, VP_GATHER)
 #undef HANDLE_VP_TO_CONSTRAINEDFP
 #undef HANDLE_VP_TO_INTRIN
 #undef HANDLE_VP_IS_MEMOP
+#undef HANDLE_VP_REDUCTION
+#undef PROPERTY_VP_BINARYOP_SDNODE
+#undef PROPERTY_VP_REDUCTION_SDNODE
diff --git a/llvm/include/llvm/IR/Value.h b/llvm/include/llvm/IR/Value.h
index 2ad1c9e8c300..fc2ed00d770f 100644
--- a/llvm/include/llvm/IR/Value.h
+++ b/llvm/include/llvm/IR/Value.h
@@ -37,7 +37,6 @@ class DataLayout;
 class Function;
 class GlobalAlias;
 class GlobalIFunc;
-class GlobalIndirectSymbol;
 class GlobalObject;
 class GlobalValue;
 class GlobalVariable;
@@ -454,14 +453,18 @@ public:
 
   /// Return true if there is exactly one use of this value that cannot be
   /// dropped.
-  ///
-  /// This is specialized because it is a common request and does not require
-  /// traversing the whole use list.
   Use *getSingleUndroppableUse();
   const Use *getSingleUndroppableUse() const {
     return const_cast<Value *>(this)->getSingleUndroppableUse();
   }
 
+  /// Return true if there is exactly one unique user of this value that cannot be
+  /// dropped (that user can have multiple uses of this value).
+  User *getUniqueUndroppableUser();
+  const User *getUniqueUndroppableUser() const {
+    return const_cast<Value *>(this)->getUniqueUndroppableUser();
+  }
+
   /// Return true if there this value.
   ///
   /// This is specialized because it is a common request and does not require
@@ -690,6 +693,9 @@ public:
   /// If \p AllowNonInbounds is true, offsets in GEPs are stripped and
   /// accumulated even if the GEP is not "inbounds".
   ///
+  /// If \p AllowInvariantGroup is true then this method also looks through
+  /// strip.invariant.group and launder.invariant.group intrinsics.
+  ///
   /// If \p ExternalAnalysis is provided it will be used to calculate a offset
   /// when a operand of GEP is not constant.
   /// For example, for a value \p ExternalAnalysis might try to calculate a
@@ -705,13 +711,15 @@ public:
   /// is unchanged.
   const Value *stripAndAccumulateConstantOffsets(
       const DataLayout &DL, APInt &Offset, bool AllowNonInbounds,
+      bool AllowInvariantGroup = false,
       function_ref<bool(Value &Value, APInt &Offset)> ExternalAnalysis =
           nullptr) const;
   Value *stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset,
-                                           bool AllowNonInbounds) {
+                                           bool AllowNonInbounds,
+                                           bool AllowInvariantGroup = false) {
     return const_cast<Value *>(
         static_cast<const Value *>(this)->stripAndAccumulateConstantOffsets(
-            DL, Offset, AllowNonInbounds));
+            DL, Offset, AllowNonInbounds, AllowInvariantGroup));
   }
 
   /// This is a wrapper around stripAndAccumulateConstantOffsets with the
@@ -781,8 +789,8 @@ public:
   ///
   /// This is the greatest alignment value supported by load, store, and alloca
   /// instructions, and global values.
-  static const unsigned MaxAlignmentExponent = 29;
-  static const unsigned MaximumAlignment = 1u << MaxAlignmentExponent;
+  static constexpr unsigned MaxAlignmentExponent = 32;
+  static constexpr uint64_t MaximumAlignment = 1ULL << MaxAlignmentExponent;
 
   /// Mutate the type of this Value to be of the specified type.
   ///
@@ -1012,21 +1020,16 @@ template <> struct isa_impl<GlobalIFunc, Value> {
   }
 };
 
-template <> struct isa_impl<GlobalIndirectSymbol, Value> {
-  static inline bool doit(const Value &Val) {
-    return isa<GlobalAlias>(Val) || isa<GlobalIFunc>(Val);
-  }
-};
-
 template <> struct isa_impl<GlobalValue, Value> {
   static inline bool doit(const Value &Val) {
-    return isa<GlobalObject>(Val) || isa<GlobalIndirectSymbol>(Val);
+    return isa<GlobalObject>(Val) || isa<GlobalAlias>(Val);
   }
 };
 
 template <> struct isa_impl<GlobalObject, Value> {
   static inline bool doit(const Value &Val) {
-    return isa<GlobalVariable>(Val) || isa<Function>(Val);
+    return isa<GlobalVariable>(Val) || isa<Function>(Val) ||
+           isa<GlobalIFunc>(Val);
   }
 };
 
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 365240de321a..845d7dcdebd2 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -64,6 +64,7 @@ void initializeAAEvalLegacyPassPass(PassRegistry&);
 void initializeAAResultsWrapperPassPass(PassRegistry&);
 void initializeADCELegacyPassPass(PassRegistry&);
 void initializeAddDiscriminatorsLegacyPassPass(PassRegistry&);
+void initializeAddFSDiscriminatorsPass(PassRegistry &);
 void initializeModuleAddressSanitizerLegacyPassPass(PassRegistry &);
 void initializeASanGlobalsMetadataWrapperPassPass(PassRegistry &);
 void initializeAddressSanitizerLegacyPassPass(PassRegistry &);
@@ -183,6 +184,7 @@ void initializeGlobalSplitPass(PassRegistry&);
 void initializeGlobalsAAWrapperPassPass(PassRegistry&);
 void initializeGuardWideningLegacyPassPass(PassRegistry&);
 void initializeHardwareLoopsPass(PassRegistry&);
+void initializeMIRProfileLoaderPassPass(PassRegistry &);
 void initializeMemProfilerLegacyPassPass(PassRegistry &);
 void initializeHotColdSplittingLegacyPassPass(PassRegistry&);
 void initializeHWAddressSanitizerLegacyPassPass(PassRegistry &);
@@ -234,7 +236,8 @@ void initializeLiveIntervalsPass(PassRegistry&);
 void initializeLiveRangeShrinkPass(PassRegistry&);
 void initializeLiveRegMatrixPass(PassRegistry&);
 void initializeLiveStacksPass(PassRegistry&);
-void initializeLiveVariablesPass(PassRegistry&);
+void initializeLiveVariablesPass(PassRegistry &);
+void initializeLoadStoreOptPass(PassRegistry &);
 void initializeLoadStoreVectorizerLegacyPassPass(PassRegistry&);
 void initializeLoaderPassPass(PassRegistry&);
 void initializeLocalStackSlotPassPass(PassRegistry&);
diff --git a/llvm/include/llvm/InterfaceStub/IFSHandler.h b/llvm/include/llvm/InterfaceStub/IFSHandler.h
index de627492366f..6ae6a421318e 100644
--- a/llvm/include/llvm/InterfaceStub/IFSHandler.h
+++ b/llvm/include/llvm/InterfaceStub/IFSHandler.h
@@ -51,6 +51,9 @@ Error validateIFSTarget(IFSStub &Stub, bool ParseTriple);
 void stripIFSTarget(IFSStub &Stub, bool StripTriple, bool StripArch,
                     bool StripEndianness, bool StripBitWidth);
 
+/// Strips symbols from IFS symbol table that are undefined.
+void stripIFSUndefinedSymbols(IFSStub &Stub);
+
 /// Parse llvm triple string into a IFSTarget struct.
 IFSTarget parseTriple(StringRef TripleStr);
 
diff --git a/llvm/include/llvm/LTO/Caching.h b/llvm/include/llvm/LTO/Caching.h
deleted file mode 100644
index 43b978328b74..000000000000
--- a/llvm/include/llvm/LTO/Caching.h
+++ /dev/null
@@ -1,38 +0,0 @@
-//===- Caching.h - LLVM Link Time Optimizer Configuration -----------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the localCache function, which allows clients to add a
-// filesystem cache to ThinLTO.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LTO_CACHING_H
-#define LLVM_LTO_CACHING_H
-
-#include "llvm/LTO/LTO.h"
-
-namespace llvm {
-namespace lto {
-
-/// This type defines the callback to add a pre-existing native object file
-/// (e.g. in a cache).
-///
-/// Buffer callbacks must be thread safe.
-using AddBufferFn =
-    std::function<void(unsigned Task, std::unique_ptr<MemoryBuffer> MB)>;
-
-/// Create a local file system cache which uses the given cache directory and
-/// file callback. This function also creates the cache directory if it does not
-/// already exist.
-Expected<NativeObjectCache> localCache(StringRef CacheDirectoryPath,
-                                       AddBufferFn AddBuffer);
-
-} // namespace lto
-} // namespace llvm
-
-#endif
diff --git a/llvm/include/llvm/LTO/Config.h b/llvm/include/llvm/LTO/Config.h
index 5fd3c9f408f3..eb793d62907e 100644
--- a/llvm/include/llvm/LTO/Config.h
+++ b/llvm/include/llvm/LTO/Config.h
@@ -70,6 +70,9 @@ struct Config {
   /// Run PGO context sensitive IR instrumentation.
   bool RunCSIRInstr = false;
 
+  /// Turn on/off the warning about a hash mismatch in the PGO profile data.
+  bool PGOWarnMismatch = true;
+
   /// Asserts whether we can assume whole program visibility during the LTO
   /// link.
   bool HasWholeProgramVisibility = false;
diff --git a/llvm/include/llvm/LTO/LTO.h b/llvm/include/llvm/LTO/LTO.h
index ea1dea2d6f42..d2b0fef1ca47 100644
--- a/llvm/include/llvm/LTO/LTO.h
+++ b/llvm/include/llvm/LTO/LTO.h
@@ -21,8 +21,10 @@
 #include "llvm/IR/ModuleSummaryIndex.h"
 #include "llvm/LTO/Config.h"
 #include "llvm/Object/IRSymtab.h"
+#include "llvm/Support/Caching.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/thread.h"
+#include "llvm/Transforms/IPO/FunctionAttrs.h"
 #include "llvm/Transforms/IPO/FunctionImport.h"
 
 namespace llvm {
@@ -38,7 +40,7 @@ class ToolOutputFile;
 
 /// Resolve linkage for prevailing symbols in the \p Index. Linkage changes
 /// recorded in the index and the ThinLTO backends must apply the changes to
-/// the module via thinLTOResolvePrevailingInModule.
+/// the module via thinLTOFinalizeInModule.
 ///
 /// This is done for correctness (if value exported, ensure we always
 /// emit a copy), and compile-time optimization (allow drop of duplicates).
@@ -186,47 +188,13 @@ private:
   }
 };
 
-/// This class wraps an output stream for a native object. Most clients should
-/// just be able to return an instance of this base class from the stream
-/// callback, but if a client needs to perform some action after the stream is
-/// written to, that can be done by deriving from this class and overriding the
-/// destructor.
-class NativeObjectStream {
-public:
-  NativeObjectStream(std::unique_ptr<raw_pwrite_stream> OS) : OS(std::move(OS)) {}
-  std::unique_ptr<raw_pwrite_stream> OS;
-  virtual ~NativeObjectStream() = default;
-};
-
-/// This type defines the callback to add a native object that is generated on
-/// the fly.
-///
-/// Stream callbacks must be thread safe.
-using AddStreamFn =
-    std::function<std::unique_ptr<NativeObjectStream>(unsigned Task)>;
-
-/// This is the type of a native object cache. To request an item from the
-/// cache, pass a unique string as the Key. For hits, the cached file will be
-/// added to the link and this function will return AddStreamFn(). For misses,
-/// the cache will return a stream callback which must be called at most once to
-/// produce content for the stream. The native object stream produced by the
-/// stream callback will add the file to the link after the stream is written
-/// to.
-///
-/// Clients generally look like this:
-///
-/// if (AddStreamFn AddStream = Cache(Task, Key))
-///   ProduceContent(AddStream);
-using NativeObjectCache =
-    std::function<AddStreamFn(unsigned Task, StringRef Key)>;
-
 /// A ThinBackend defines what happens after the thin-link phase during ThinLTO.
 /// The details of this type definition aren't important; clients can only
 /// create a ThinBackend using one of the create*ThinBackend() functions below.
 using ThinBackend = std::function<std::unique_ptr<ThinBackendProc>(
     const Config &C, ModuleSummaryIndex &CombinedIndex,
     StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries,
-    AddStreamFn AddStream, NativeObjectCache Cache)>;
+    AddStreamFn AddStream, FileCache Cache)>;
 
 /// This ThinBackend runs the individual backend jobs in-process.
 /// The default value means to use one job per hardware core (not hyper-thread).
@@ -299,7 +267,7 @@ public:
   ///
   /// The client will receive at most one callback (via either AddStream or
   /// Cache) for each task identifier.
-  Error run(AddStreamFn AddStream, NativeObjectCache Cache = nullptr);
+  Error run(AddStreamFn AddStream, FileCache Cache = nullptr);
 
   /// Static method that returns a list of libcall symbols that can be generated
   /// by LTO but might not be visible from bitcode symbol table.
@@ -431,7 +399,7 @@ private:
                    const SymbolResolution *&ResI, const SymbolResolution *ResE);
 
   Error runRegularLTO(AddStreamFn AddStream);
-  Error runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache,
+  Error runThinLTO(AddStreamFn AddStream, FileCache Cache,
                    const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols);
 
   Error checkPartiallySplit();
@@ -444,6 +412,9 @@ private:
   // Identify symbols exported dynamically, and that therefore could be
   // referenced by a shared library not visible to the linker.
   DenseSet<GlobalValue::GUID> DynamicExportSymbols;
+
+  // Diagnostic optimization remarks file
+  std::unique_ptr<ToolOutputFile> DiagnosticOutputFile;
 };
 
 /// The resolution for a symbol. The linker must provide a SymbolResolution for
diff --git a/llvm/include/llvm/LTO/SummaryBasedOptimizations.h b/llvm/include/llvm/LTO/SummaryBasedOptimizations.h
index 6697c821a5ea..508ab2587ac5 100644
--- a/llvm/include/llvm/LTO/SummaryBasedOptimizations.h
+++ b/llvm/include/llvm/LTO/SummaryBasedOptimizations.h
@@ -10,6 +10,8 @@
 #define LLVM_LTO_SUMMARYBASEDOPTIMIZATIONS_H
 namespace llvm {
 class ModuleSummaryIndex;
+
+/// Compute synthetic function entry counts.
 void computeSyntheticCounts(ModuleSummaryIndex &Index);
 
 } // namespace llvm
diff --git a/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h b/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h
index 31688e43e174..333f483f29c5 100644
--- a/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h
+++ b/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h
@@ -176,7 +176,7 @@ struct LTOCodeGenerator {
   /// created using the \p AddStream callback. Returns true on success.
   ///
   /// Calls \a verifyMergedModuleOnce().
-  bool compileOptimized(lto::AddStreamFn AddStream, unsigned ParallelismLevel);
+  bool compileOptimized(AddStreamFn AddStream, unsigned ParallelismLevel);
 
   /// Enable the Freestanding mode: indicate that the optimizer should not
   /// assume builtins are present on the target.
diff --git a/llvm/include/llvm/LTO/legacy/LTOModule.h b/llvm/include/llvm/LTO/legacy/LTOModule.h
index 2a25dab58ada..01e63db4bab3 100644
--- a/llvm/include/llvm/LTO/legacy/LTOModule.h
+++ b/llvm/include/llvm/LTO/legacy/LTOModule.h
@@ -167,6 +167,10 @@ public:
 
   Expected<uint32_t> getMachOCPUSubType() const;
 
+  /// Returns true if the module has either the @llvm.global_ctors or the
+  /// @llvm.global_dtors symbol. Otherwise returns false.
+  bool hasCtorDtor() const;
+
 private:
   /// Parse metadata from the module
   // FIXME: it only parses "llvm.linker.options" metadata at the moment
diff --git a/llvm/include/llvm/LinkAllIR.h b/llvm/include/llvm/LinkAllIR.h
index 4b0aabeee701..ceed784d557d 100644
--- a/llvm/include/llvm/LinkAllIR.h
+++ b/llvm/include/llvm/LinkAllIR.h
@@ -38,6 +38,9 @@ namespace {
       // delete it all as dead code, even with whole program optimization,
       // yet is effectively a NO-OP. As the compiler isn't smart enough
       // to know that getenv() never returns -1, this will do the job.
+      // This is so that globals in the translation units where these functions
+      // are defined are forced to be initialized, populating various
+      // registries.
       if (std::getenv("bar") != (char*) -1)
         return;
       llvm::LLVMContext Context;
diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h
index 45978828a8ce..c8b9aaeed76a 100644
--- a/llvm/include/llvm/LinkAllPasses.h
+++ b/llvm/include/llvm/LinkAllPasses.h
@@ -64,6 +64,9 @@ namespace {
       // delete it all as dead code, even with whole program optimization,
       // yet is effectively a NO-OP. As the compiler isn't smart enough
       // to know that getenv() never returns -1, this will do the job.
+      // This is so that globals in the translation units where these functions
+      // are defined are forced to be initialized, populating various
+      // registries.
       if (std::getenv("bar") != (char*) -1)
         return;
 
diff --git a/llvm/include/llvm/MC/MCAsmBackend.h b/llvm/include/llvm/MC/MCAsmBackend.h
index 08739d51f751..bb57c3453d10 100644
--- a/llvm/include/llvm/MC/MCAsmBackend.h
+++ b/llvm/include/llvm/MC/MCAsmBackend.h
@@ -55,7 +55,8 @@ public:
   /// Give the target a chance to manipulate state related to instruction
   /// alignment (e.g. padding for optimization), instruction relaxablility, etc.
   /// before and after actually emitting the instruction.
-  virtual void emitInstructionBegin(MCObjectStreamer &OS, const MCInst &Inst) {}
+  virtual void emitInstructionBegin(MCObjectStreamer &OS, const MCInst &Inst,
+                                    const MCSubtargetInfo &STI) {}
   virtual void emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) {}
 
   /// lifetime management
@@ -185,13 +186,16 @@ public:
 
   /// Returns the maximum size of a nop in bytes on this target.
   ///
-  virtual unsigned getMaximumNopSize() const { return 0; }
+  virtual unsigned getMaximumNopSize(const MCSubtargetInfo &STI) const {
+    return 0;
+  }
 
   /// Write an (optimal) nop sequence of Count bytes to the given output. If the
   /// target cannot generate such a sequence, it should return an error.
   ///
   /// \return - True on success.
-  virtual bool writeNopData(raw_ostream &OS, uint64_t Count) const = 0;
+  virtual bool writeNopData(raw_ostream &OS, uint64_t Count,
+                            const MCSubtargetInfo *STI) const = 0;
 
   /// Give backend an opportunity to finish layout after relaxation
   virtual void finishLayout(MCAssembler const &Asm,
diff --git a/llvm/include/llvm/MC/MCAsmInfoGOFF.h b/llvm/include/llvm/MC/MCAsmInfoGOFF.h
new file mode 100644
index 000000000000..1f3b26311b37
--- /dev/null
+++ b/llvm/include/llvm/MC/MCAsmInfoGOFF.h
@@ -0,0 +1,29 @@
+//===- MCAsmInfoGOFF.h - GOFF Asm Info Fields -------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines certain target specific asm properties for GOFF (z/OS)
+/// based targets.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCASMINFOGOFF_H
+#define LLVM_MC_MCASMINFOGOFF_H
+
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+class MCAsmInfoGOFF : public MCAsmInfo {
+  virtual void anchor();
+
+protected:
+  MCAsmInfoGOFF();
+};
+} // end namespace llvm
+
+#endif // LLVM_MC_MCASMINFOGOFF_H
diff --git a/llvm/include/llvm/MC/MCContext.h b/llvm/include/llvm/MC/MCContext.h
index 877b2dc4ac92..bde750759a0b 100644
--- a/llvm/include/llvm/MC/MCContext.h
+++ b/llvm/include/llvm/MC/MCContext.h
@@ -817,7 +817,7 @@ namespace llvm {
     // Unrecoverable error has occurred. Display the best diagnostic we can
     // and bail via exit(1). For now, most MC backend errors are unrecoverable.
     // FIXME: We should really do something about that.
-    LLVM_ATTRIBUTE_NORETURN void reportFatalError(SMLoc L, const Twine &Msg);
+    [[noreturn]] void reportFatalError(SMLoc L, const Twine &Msg);
 
     const MCAsmMacro *lookupMacro(StringRef Name) {
       StringMap<MCAsmMacro>::iterator I = MacroMap.find(Name);
diff --git a/llvm/include/llvm/MC/MCDwarf.h b/llvm/include/llvm/MC/MCDwarf.h
index 23efdc70609b..7e72d56f3097 100644
--- a/llvm/include/llvm/MC/MCDwarf.h
+++ b/llvm/include/llvm/MC/MCDwarf.h
@@ -20,6 +20,7 @@
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCSection.h"
+#include "llvm/MC/StringTableBuilder.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/MD5.h"
 #include <cassert>
@@ -34,7 +35,6 @@ namespace llvm {
 template <typename T> class ArrayRef;
 class MCAsmBackend;
 class MCContext;
-class MCDwarfLineStr;
 class MCObjectStreamer;
 class MCStreamer;
 class MCSymbol;
@@ -47,6 +47,24 @@ namespace mcdwarf {
 MCSymbol *emitListsTableHeaderStart(MCStreamer &S);
 } // namespace mcdwarf
 
+/// Manage the .debug_line_str section contents, if we use it.
+class MCDwarfLineStr {
+  MCSymbol *LineStrLabel = nullptr;
+  StringTableBuilder LineStrings{StringTableBuilder::DWARF};
+  bool UseRelocs = false;
+
+public:
+  /// Construct an instance that can emit .debug_line_str (for use in a normal
+  /// v5 line table).
+  explicit MCDwarfLineStr(MCContext &Ctx);
+
+  /// Emit a reference to the string.
+  void emitRef(MCStreamer *MCOS, StringRef Path);
+
+  /// Emit the .debug_line_str section if appropriate.
+  void emitSection(MCStreamer *MCOS);
+};
+
 /// Instances of this class represent the name of the dwarf .file directive and
 /// its associated dwarf file number in the MC file. MCDwarfFile's are created
 /// and uniqued by the MCContext class. In Dwarf 4 file numbers start from 1;
@@ -170,6 +188,15 @@ public:
 
   MCSymbol *getLabel() const { return Label; }
 
+  // This indicates the line entry is synthesized for an end entry.
+  bool IsEndEntry = false;
+
+  // Override the label with the given EndLabel.
+  void setEndLabel(MCSymbol *EndLabel) {
+    Label = EndLabel;
+    IsEndEntry = true;
+  }
+
   // This is called when an instruction is assembled into the specified
   // section and if there is information from the last .loc directive that
   // has yet to have a line entry made for it is made.
@@ -187,6 +214,10 @@ public:
     MCLineDivisions[Sec].push_back(LineEntry);
   }
 
+  // Add an end entry by cloning the last entry, if exists, for the section
+  // the given EndLabel belongs to. The label is replaced by the given EndLabel.
+  void addEndEntry(MCSymbol *EndLabel);
+
   using MCDwarfLineEntryCollection = std::vector<MCDwarfLineEntry>;
   using iterator = MCDwarfLineEntryCollection::iterator;
   using const_iterator = MCDwarfLineEntryCollection::const_iterator;
@@ -317,6 +348,11 @@ public:
   void emitCU(MCStreamer *MCOS, MCDwarfLineTableParams Params,
               Optional<MCDwarfLineStr> &LineStr) const;
 
+  // This emits a single line table associated with a given Section.
+  static void
+  emitOne(MCStreamer *MCOS, MCSection *Section,
+          const MCLineSection::MCDwarfLineEntryCollection &LineEntries);
+
   Expected<unsigned> tryGetFile(StringRef &Directory, StringRef &FileName,
                                 Optional<MD5::MD5Result> Checksum,
                                 Optional<StringRef> Source,
diff --git a/llvm/include/llvm/MC/MCELFObjectWriter.h b/llvm/include/llvm/MC/MCELFObjectWriter.h
index 9f4b8de7947b..fa17759bc21a 100644
--- a/llvm/include/llvm/MC/MCELFObjectWriter.h
+++ b/llvm/include/llvm/MC/MCELFObjectWriter.h
@@ -78,6 +78,8 @@ public:
       case Triple::PS4:
       case Triple::FreeBSD:
         return ELF::ELFOSABI_FREEBSD;
+      case Triple::Solaris:
+        return ELF::ELFOSABI_SOLARIS;
       default:
         return ELF::ELFOSABI_NONE;
     }
diff --git a/llvm/include/llvm/MC/MCELFStreamer.h b/llvm/include/llvm/MC/MCELFStreamer.h
index 8c1e22a14702..8f2b176862c8 100644
--- a/llvm/include/llvm/MC/MCELFStreamer.h
+++ b/llvm/include/llvm/MC/MCELFStreamer.h
@@ -39,7 +39,7 @@ public:
   /// \name MCStreamer Interface
   /// @{
 
-  void InitSections(bool NoExecStack) override;
+  void initSections(bool NoExecStack, const MCSubtargetInfo &STI) override;
   void changeSection(MCSection *Section, const MCExpr *Subsection) override;
   void emitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override;
   void emitLabelAtPos(MCSymbol *Symbol, SMLoc Loc, MCFragment *F,
diff --git a/llvm/include/llvm/MC/MCExpr.h b/llvm/include/llvm/MC/MCExpr.h
index 38cca2413e1e..bf1f32bb91ba 100644
--- a/llvm/include/llvm/MC/MCExpr.h
+++ b/llvm/include/llvm/MC/MCExpr.h
@@ -200,6 +200,7 @@ public:
     VK_GOTREL,
     VK_PCREL,
     VK_GOTPCREL,
+    VK_GOTPCREL_NORELAX,
     VK_GOTTPOFF,
     VK_INDNTPOFF,
     VK_NTPOFF,
@@ -328,6 +329,7 @@ public:
     VK_WASM_TLSREL,    // Memory address relative to __tls_base
     VK_WASM_MBREL,     // Memory address relative to __memory_base
     VK_WASM_TBREL,     // Table index relative to __table_base
+    VK_WASM_GOT_TLS,   // Wasm global index of TLS symbol.
 
     VK_AMDGPU_GOTPCREL32_LO, // symbol@gotpcrel32@lo
     VK_AMDGPU_GOTPCREL32_HI, // symbol@gotpcrel32@hi
diff --git a/llvm/include/llvm/MC/MCFragment.h b/llvm/include/llvm/MC/MCFragment.h
index f3a785fb09b7..736fdd992063 100644
--- a/llvm/include/llvm/MC/MCFragment.h
+++ b/llvm/include/llvm/MC/MCFragment.h
@@ -311,6 +311,9 @@ class MCAlignFragment : public MCFragment {
   /// cannot be satisfied in this width then this fragment is ignored.
   unsigned MaxBytesToEmit;
 
+  /// When emitting Nops some subtargets have specific nop encodings.
+  const MCSubtargetInfo *STI;
+
 public:
   MCAlignFragment(unsigned Alignment, int64_t Value, unsigned ValueSize,
                   unsigned MaxBytesToEmit, MCSection *Sec = nullptr)
@@ -326,7 +329,12 @@ public:
   unsigned getMaxBytesToEmit() const { return MaxBytesToEmit; }
 
   bool hasEmitNops() const { return EmitNops; }
-  void setEmitNops(bool Value) { EmitNops = Value; }
+  void setEmitNops(bool Value, const MCSubtargetInfo *STI) {
+    EmitNops = Value;
+    this->STI = STI;
+  }
+
+  const MCSubtargetInfo *getSubtargetInfo() const { return STI; }
 
   static bool classof(const MCFragment *F) {
     return F->getKind() == MCFragment::FT_Align;
@@ -369,17 +377,22 @@ class MCNopsFragment : public MCFragment {
   /// Source location of the directive that this fragment was created for.
   SMLoc Loc;
 
+  /// When emitting Nops some subtargets have specific nop encodings.
+  const MCSubtargetInfo &STI;
+
 public:
   MCNopsFragment(int64_t NumBytes, int64_t ControlledNopLength, SMLoc L,
-                 MCSection *Sec = nullptr)
+                 const MCSubtargetInfo &STI, MCSection *Sec = nullptr)
       : MCFragment(FT_Nops, false, Sec), Size(NumBytes),
-        ControlledNopLength(ControlledNopLength), Loc(L) {}
+        ControlledNopLength(ControlledNopLength), Loc(L), STI(STI) {}
 
   int64_t getNumBytes() const { return Size; }
   int64_t getControlledNopLength() const { return ControlledNopLength; }
 
   SMLoc getLoc() const { return Loc; }
 
+  const MCSubtargetInfo *getSubtargetInfo() const { return &STI; }
+
   static bool classof(const MCFragment *F) {
     return F->getKind() == MCFragment::FT_Nops;
   }
@@ -572,10 +585,14 @@ class MCBoundaryAlignFragment : public MCFragment {
   /// is not meaningful before that.
   uint64_t Size = 0;
 
+  /// When emitting Nops some subtargets have specific nop encodings.
+  const MCSubtargetInfo &STI;
+
 public:
-  MCBoundaryAlignFragment(Align AlignBoundary, MCSection *Sec = nullptr)
-      : MCFragment(FT_BoundaryAlign, false, Sec), AlignBoundary(AlignBoundary) {
-  }
+  MCBoundaryAlignFragment(Align AlignBoundary, const MCSubtargetInfo &STI,
+                          MCSection *Sec = nullptr)
+      : MCFragment(FT_BoundaryAlign, false, Sec), AlignBoundary(AlignBoundary),
+        STI(STI) {}
 
   uint64_t getSize() const { return Size; }
   void setSize(uint64_t Value) { Size = Value; }
@@ -589,6 +606,8 @@ public:
     LastFragment = F;
   }
 
+  const MCSubtargetInfo *getSubtargetInfo() const { return &STI; }
+
   static bool classof(const MCFragment *F) {
     return F->getKind() == MCFragment::FT_BoundaryAlign;
   }
diff --git a/llvm/include/llvm/MC/MCInstrAnalysis.h b/llvm/include/llvm/MC/MCInstrAnalysis.h
index 898ca47b13b8..632a7d8f820e 100644
--- a/llvm/include/llvm/MC/MCInstrAnalysis.h
+++ b/llvm/include/llvm/MC/MCInstrAnalysis.h
@@ -154,9 +154,14 @@ public:
 
   /// Given an instruction tries to get the address of a memory operand. Returns
   /// the address on success.
-  virtual Optional<uint64_t> evaluateMemoryOperandAddress(const MCInst &Inst,
-                                                          uint64_t Addr,
-                                                          uint64_t Size) const;
+  virtual Optional<uint64_t>
+  evaluateMemoryOperandAddress(const MCInst &Inst, const MCSubtargetInfo *STI,
+                               uint64_t Addr, uint64_t Size) const;
+
+  /// Given an instruction with a memory operand that could require relocation,
+  /// returns the offset within the instruction of that relocation.
+  virtual Optional<uint64_t>
+  getMemoryOperandRelocationOffset(const MCInst &Inst, uint64_t Size) const;
 
   /// Returns (PLT virtual address, GOT virtual address) pairs for PLT entries.
   virtual std::vector<std::pair<uint64_t, uint64_t>>
diff --git a/llvm/include/llvm/MC/MCInstrDesc.h b/llvm/include/llvm/MC/MCInstrDesc.h
index 0e6b677098e8..e8ffd29170e6 100644
--- a/llvm/include/llvm/MC/MCInstrDesc.h
+++ b/llvm/include/llvm/MC/MCInstrDesc.h
@@ -76,7 +76,7 @@ enum OperandType {
   OPERAND_FIRST_TARGET = 13,
 };
 
-}
+} // namespace MCOI
 
 /// This holds information about one operand of a machine instruction,
 /// indicating the register class for register operands, etc.
@@ -185,7 +185,7 @@ enum Flag {
   VariadicOpsAreDefs,
   Authenticated,
 };
-}
+} // namespace MCID
 
 /// Describe properties that are true of each instruction in the target
 /// description file.  This captures information about side effects, register
diff --git a/llvm/include/llvm/MC/MCObjectFileInfo.h b/llvm/include/llvm/MC/MCObjectFileInfo.h
index 8ae86ef2a574..ba7450ac64f1 100644
--- a/llvm/include/llvm/MC/MCObjectFileInfo.h
+++ b/llvm/include/llvm/MC/MCObjectFileInfo.h
@@ -225,10 +225,13 @@ protected:
 
   // XCOFF specific sections
   MCSection *TOCBaseSection = nullptr;
+  MCSection *ReadOnly8Section = nullptr;
+  MCSection *ReadOnly16Section = nullptr;
 
 public:
   void initMCObjectFileInfo(MCContext &MCCtx, bool PIC,
                             bool LargeCodeModel = false);
+  virtual ~MCObjectFileInfo();
   MCContext &getContext() const { return *Ctx; }
 
   bool getSupportsWeakOmittedEHFrame() const {
@@ -251,6 +254,7 @@ public:
     return CompactUnwindDwarfEHFrameOnly;
   }
 
+  virtual unsigned getTextSectionAlignment() const { return 4; }
   MCSection *getTextSection() const { return TextSection; }
   MCSection *getDataSection() const { return DataSection; }
   MCSection *getBSSSection() const { return BSSSection; }
diff --git a/llvm/include/llvm/MC/MCObjectStreamer.h b/llvm/include/llvm/MC/MCObjectStreamer.h
index dcdee2b5774b..9d6416e4a18d 100644
--- a/llvm/include/llvm/MC/MCObjectStreamer.h
+++ b/llvm/include/llvm/MC/MCObjectStreamer.h
@@ -137,7 +137,7 @@ public:
   void emitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
                             unsigned ValueSize = 1,
                             unsigned MaxBytesToEmit = 0) override;
-  void emitCodeAlignment(unsigned ByteAlignment,
+  void emitCodeAlignment(unsigned ByteAlignment, const MCSubtargetInfo *STI,
                          unsigned MaxBytesToEmit = 0) override;
   void emitValueToOffset(const MCExpr *Offset, unsigned char Value,
                          SMLoc Loc) override;
@@ -181,8 +181,8 @@ public:
                 SMLoc Loc = SMLoc()) override;
   void emitFill(const MCExpr &NumValues, int64_t Size, int64_t Expr,
                 SMLoc Loc = SMLoc()) override;
-  void emitNops(int64_t NumBytes, int64_t ControlledNopLength,
-                SMLoc Loc) override;
+  void emitNops(int64_t NumBytes, int64_t ControlledNopLength, SMLoc Loc,
+                const MCSubtargetInfo &STI) override;
   void emitFileDirective(StringRef Filename) override;
   void emitFileDirective(StringRef Filename, StringRef CompilerVerion,
                          StringRef TimeStamp, StringRef Description) override;
diff --git a/llvm/include/llvm/MC/MCPseudoProbe.h b/llvm/include/llvm/MC/MCPseudoProbe.h
index 1c6926b9a9e6..abc9705f0851 100644
--- a/llvm/include/llvm/MC/MCPseudoProbe.h
+++ b/llvm/include/llvm/MC/MCPseudoProbe.h
@@ -44,17 +44,26 @@
 #ifndef LLVM_MC_MCPSEUDOPROBE_H
 #define LLVM_MC_MCPSEUDOPROBE_H
 
-#include "llvm/ADT/MapVector.h"
-#include "llvm/MC/MCSection.h"
-#include <functional>
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/PseudoProbe.h"
+#include "llvm/Support/ErrorOr.h"
+#include <list>
 #include <map>
+#include <memory>
+#include <string>
+#include <tuple>
+#include <type_traits>
+#include <unordered_map>
 #include <vector>
 
 namespace llvm {
 
+class MCSection;
 class MCStreamer;
 class MCSymbol;
 class MCObjectStreamer;
+class raw_ostream;
 
 enum class MCPseudoProbeFlag {
   // If set, indicates that the probe is encoded as an address delta
@@ -62,69 +71,211 @@ enum class MCPseudoProbeFlag {
   AddressDelta = 0x1,
 };
 
+// Function descriptor decoded from .pseudo_probe_desc section
+struct MCPseudoProbeFuncDesc {
+  uint64_t FuncGUID = 0;
+  uint64_t FuncHash = 0;
+  std::string FuncName;
+
+  MCPseudoProbeFuncDesc(uint64_t GUID, uint64_t Hash, StringRef Name)
+      : FuncGUID(GUID), FuncHash(Hash), FuncName(Name){};
+
+  void print(raw_ostream &OS);
+};
+
+class MCPseudoProbe;
+class MCDecodedPseudoProbe;
+
+// An inline frame has the form <Guid, ProbeID>
+using InlineSite = std::tuple<uint64_t, uint32_t>;
+using MCPseudoProbeInlineStack = SmallVector<InlineSite, 8>;
+// GUID to PseudoProbeFuncDesc map
+using GUIDProbeFunctionMap =
+    std::unordered_map<uint64_t, MCPseudoProbeFuncDesc>;
+// Address to pseudo probes map.
+using AddressProbesMap =
+    std::unordered_map<uint64_t, std::list<MCDecodedPseudoProbe>>;
+
+class MCPseudoProbeInlineTree;
+class MCDecodedPseudoProbeInlineTree;
+
+class MCPseudoProbeBase {
+protected:
+  uint64_t Guid;
+  uint64_t Index;
+  uint8_t Attributes;
+  uint8_t Type;
+  // The value should be equal to PseudoProbeReservedId::Last + 1 which is
+  // defined in SampleProfileProbe.h. The header file is not included here to
+  // reduce the dependency from MC to IPO.
+  const static uint32_t PseudoProbeFirstId = 1;
+
+public:
+  MCPseudoProbeBase(uint64_t G, uint64_t I, uint64_t At, uint8_t T)
+      : Guid(G), Index(I), Attributes(At), Type(T) {}
+
+  bool isEntry() const { return Index == PseudoProbeFirstId; }
+
+  uint64_t getGuid() const { return Guid; }
+
+  uint64_t getIndex() const { return Index; }
+
+  uint8_t getAttributes() const { return Attributes; }
+
+  uint8_t getType() const { return Type; }
+
+  bool isBlock() const {
+    return Type == static_cast<uint8_t>(PseudoProbeType::Block);
+  }
+
+  bool isIndirectCall() const {
+    return Type == static_cast<uint8_t>(PseudoProbeType::IndirectCall);
+  }
+
+  bool isDirectCall() const {
+    return Type == static_cast<uint8_t>(PseudoProbeType::DirectCall);
+  }
+
+  bool isCall() const { return isIndirectCall() || isDirectCall(); }
+
+  void setAttributes(uint8_t Attr) { Attributes = Attr; }
+};
+
 /// Instances of this class represent a pseudo probe instance for a pseudo probe
 /// table entry, which is created during a machine instruction is assembled and
 /// uses an address from a temporary label created at the current address in the
 /// current section.
-class MCPseudoProbe {
+class MCPseudoProbe : public MCPseudoProbeBase {
   MCSymbol *Label;
-  uint64_t Guid;
-  uint64_t Index;
-  uint8_t Type;
-  uint8_t Attributes;
 
 public:
   MCPseudoProbe(MCSymbol *Label, uint64_t Guid, uint64_t Index, uint64_t Type,
                 uint64_t Attributes)
-      : Label(Label), Guid(Guid), Index(Index), Type(Type),
-        Attributes(Attributes) {
+      : MCPseudoProbeBase(Guid, Index, Attributes, Type), Label(Label) {
     assert(Type <= 0xFF && "Probe type too big to encode, exceeding 2^8");
     assert(Attributes <= 0xFF &&
            "Probe attributes too big to encode, exceeding 2^16");
   }
 
   MCSymbol *getLabel() const { return Label; }
+  void emit(MCObjectStreamer *MCOS, const MCPseudoProbe *LastProbe) const;
+};
 
-  uint64_t getGuid() const { return Guid; }
+// Represents a callsite with caller function name and probe id
+using MCPseduoProbeFrameLocation = std::pair<StringRef, uint32_t>;
 
-  uint64_t getIndex() const { return Index; }
+class MCDecodedPseudoProbe : public MCPseudoProbeBase {
+  uint64_t Address;
+  MCDecodedPseudoProbeInlineTree *InlineTree;
 
-  uint8_t getType() const { return Type; }
+public:
+  MCDecodedPseudoProbe(uint64_t Ad, uint64_t G, uint32_t I, PseudoProbeType K,
+                       uint8_t At, MCDecodedPseudoProbeInlineTree *Tree)
+      : MCPseudoProbeBase(G, I, At, static_cast<uint8_t>(K)), Address(Ad),
+        InlineTree(Tree){};
 
-  uint8_t getAttributes() const { return Attributes; }
+  uint64_t getAddress() const { return Address; }
 
-  void emit(MCObjectStreamer *MCOS, const MCPseudoProbe *LastProbe) const;
+  void setAddress(uint64_t Addr) { Address = Addr; }
+
+  MCDecodedPseudoProbeInlineTree *getInlineTreeNode() const {
+    return InlineTree;
+  }
+
+  // Get the inlined context by traversing current inline tree backwards,
+  // each tree node has its InlineSite which is taken as the context.
+  // \p ContextStack is populated in root to leaf order
+  void
+  getInlineContext(SmallVectorImpl<MCPseduoProbeFrameLocation> &ContextStack,
+                   const GUIDProbeFunctionMap &GUID2FuncMAP) const;
+
+  // Helper function to get the string from context stack
+  std::string
+  getInlineContextStr(const GUIDProbeFunctionMap &GUID2FuncMAP) const;
+
+  // Print pseudo probe while disassembling
+  void print(raw_ostream &OS, const GUIDProbeFunctionMap &GUID2FuncMAP,
+             bool ShowName) const;
 };
 
-// An inline frame has the form <Guid, ProbeID>
-using InlineSite = std::tuple<uint64_t, uint32_t>;
-using MCPseudoProbeInlineStack = SmallVector<InlineSite, 8>;
+template <typename ProbeType, typename DerivedProbeInlineTreeType>
+class MCPseudoProbeInlineTreeBase {
+  struct InlineSiteHash {
+    uint64_t operator()(const InlineSite &Site) const {
+      return std::get<0>(Site) ^ std::get<1>(Site);
+    }
+  };
 
-// A Tri-tree based data structure to group probes by inline stack.
-// A tree is allocated for a standalone .text section. A fake
-// instance is created as the root of a tree.
-// A real instance of this class is created for each function, either an
-// unlined function that has code in .text section or an inlined function.
-class MCPseudoProbeInlineTree {
-  uint64_t Guid;
+protected:
+  // Track children (e.g. inlinees) of current context
+  using InlinedProbeTreeMap = std::unordered_map<
+      InlineSite, std::unique_ptr<DerivedProbeInlineTreeType>, InlineSiteHash>;
+  InlinedProbeTreeMap Children;
   // Set of probes that come with the function.
-  std::vector<MCPseudoProbe> Probes;
-  // Use std::map for a deterministic output.
-  std::map<InlineSite, MCPseudoProbeInlineTree *> Inlinees;
+  std::vector<ProbeType> Probes;
+  MCPseudoProbeInlineTreeBase() {
+    static_assert(std::is_base_of<MCPseudoProbeInlineTreeBase,
+                                  DerivedProbeInlineTreeType>::value,
+                  "DerivedProbeInlineTreeType must be subclass of "
+                  "MCPseudoProbeInlineTreeBase");
+  }
+
+public:
+  uint64_t Guid = 0;
 
   // Root node has a GUID 0.
-  bool isRoot() { return Guid == 0; }
-  MCPseudoProbeInlineTree *getOrAddNode(InlineSite Site);
+  bool isRoot() const { return Guid == 0; }
+  InlinedProbeTreeMap &getChildren() { return Children; }
+  const InlinedProbeTreeMap &getChildren() const { return Children; }
+  std::vector<ProbeType> &getProbes() { return Probes; }
+  void addProbes(ProbeType Probe) { Probes.push_back(Probe); }
+  // Caller node of the inline site
+  MCPseudoProbeInlineTreeBase<ProbeType, DerivedProbeInlineTreeType> *Parent;
+  DerivedProbeInlineTreeType *getOrAddNode(const InlineSite &Site) {
+    auto Ret = Children.emplace(
+        Site, std::make_unique<DerivedProbeInlineTreeType>(Site));
+    Ret.first->second->Parent = this;
+    return Ret.first->second.get();
+  };
+};
 
+// A Tri-tree based data structure to group probes by inline stack.
+// A tree is allocated for a standalone .text section. A fake
+// instance is created as the root of a tree.
+// A real instance of this class is created for each function, either a
+// not inlined function that has code in .text section or an inlined function.
+class MCPseudoProbeInlineTree
+    : public MCPseudoProbeInlineTreeBase<MCPseudoProbe,
+                                         MCPseudoProbeInlineTree> {
 public:
   MCPseudoProbeInlineTree() = default;
-  MCPseudoProbeInlineTree(uint64_t Guid) : Guid(Guid) {}
-  ~MCPseudoProbeInlineTree();
+  MCPseudoProbeInlineTree(uint64_t Guid) { this->Guid = Guid; }
+  MCPseudoProbeInlineTree(const InlineSite &Site) {
+    this->Guid = std::get<0>(Site);
+  }
+
+  // MCPseudoProbeInlineTree method based on Inlinees
   void addPseudoProbe(const MCPseudoProbe &Probe,
                       const MCPseudoProbeInlineStack &InlineStack);
   void emit(MCObjectStreamer *MCOS, const MCPseudoProbe *&LastProbe);
 };
 
+// inline tree node for the decoded pseudo probe
+class MCDecodedPseudoProbeInlineTree
+    : public MCPseudoProbeInlineTreeBase<MCDecodedPseudoProbe *,
+                                         MCDecodedPseudoProbeInlineTree> {
+public:
+  InlineSite ISite;
+  // Used for decoding
+  uint32_t ChildrenToProcess = 0;
+
+  MCDecodedPseudoProbeInlineTree(){};
+  MCDecodedPseudoProbeInlineTree(const InlineSite &Site) : ISite(Site){};
+
+  // Return false if it's a dummy inline site
+  bool hasInlineSite() const { return std::get<0>(ISite) != 0; }
+};
+
 /// Instances of this class represent the pseudo probes inserted into a compile
 /// unit.
 class MCPseudoProbeSection {
@@ -172,6 +323,83 @@ public:
   static int DdgPrintIndent;
 #endif
 };
+
+class MCPseudoProbeDecoder {
+  // GUID to PseudoProbeFuncDesc map.
+  GUIDProbeFunctionMap GUID2FuncDescMap;
+
+  // Address to probes map.
+  AddressProbesMap Address2ProbesMap;
+
+  // The dummy root of the inline trie, all the outlined function will directly
+  // be the children of the dummy root, all the inlined function will be the
+  // children of its inlineer. So the relation would be like:
+  // DummyRoot --> OutlinedFunc --> InlinedFunc1 --> InlinedFunc2
+  MCDecodedPseudoProbeInlineTree DummyInlineRoot;
+
+  /// Points to the current location in the buffer.
+  const uint8_t *Data = nullptr;
+
+  /// Points to the end of the buffer.
+  const uint8_t *End = nullptr;
+
+  // Decoding helper function
+  template <typename T> ErrorOr<T> readUnencodedNumber();
+  template <typename T> ErrorOr<T> readUnsignedNumber();
+  template <typename T> ErrorOr<T> readSignedNumber();
+  ErrorOr<StringRef> readString(uint32_t Size);
+
+public:
+  // Decode pseudo_probe_desc section to build GUID to PseudoProbeFuncDesc map.
+  bool buildGUID2FuncDescMap(const uint8_t *Start, std::size_t Size);
+
+  // Decode pseudo_probe section to build address to probes map.
+  bool buildAddress2ProbeMap(const uint8_t *Start, std::size_t Size);
+
+  // Print pseudo_probe_desc section info
+  void printGUID2FuncDescMap(raw_ostream &OS);
+
+  // Print pseudo_probe section info, used along with show-disassembly
+  void printProbeForAddress(raw_ostream &OS, uint64_t Address);
+
+  // do printProbeForAddress for all addresses
+  void printProbesForAllAddresses(raw_ostream &OS);
+
+  // Look up the probe of a call for the input address
+  const MCDecodedPseudoProbe *getCallProbeForAddr(uint64_t Address) const;
+
+  const MCPseudoProbeFuncDesc *getFuncDescForGUID(uint64_t GUID) const;
+
+  // Helper function to populate one probe's inline stack into
+  // \p InlineContextStack.
+  // Current leaf location info will be added if IncludeLeaf is true
+  // Example:
+  //  Current probe(bar:3) inlined at foo:2 then inlined at main:1
+  //  IncludeLeaf = true,  Output: [main:1, foo:2, bar:3]
+  //  IncludeLeaf = false, Output: [main:1, foo:2]
+  void getInlineContextForProbe(
+      const MCDecodedPseudoProbe *Probe,
+      SmallVectorImpl<MCPseduoProbeFrameLocation> &InlineContextStack,
+      bool IncludeLeaf) const;
+
+  const AddressProbesMap &getAddress2ProbesMap() const {
+    return Address2ProbesMap;
+  }
+
+  AddressProbesMap &getAddress2ProbesMap() { return Address2ProbesMap; }
+
+  const GUIDProbeFunctionMap &getGUID2FuncDescMap() const {
+    return GUID2FuncDescMap;
+  }
+
+  const MCPseudoProbeFuncDesc *
+  getInlinerDescForProbe(const MCDecodedPseudoProbe *Probe) const;
+
+  const MCDecodedPseudoProbeInlineTree &getDummyInlineRoot() const {
+    return DummyInlineRoot;
+  }
+};
+
 } // end namespace llvm
 
 #endif // LLVM_MC_MCPSEUDOPROBE_H
diff --git a/llvm/include/llvm/MC/MCRegister.h b/llvm/include/llvm/MC/MCRegister.h
index 72507b7d8ee4..1e8c747785eb 100644
--- a/llvm/include/llvm/MC/MCRegister.h
+++ b/llvm/include/llvm/MC/MCRegister.h
@@ -10,6 +10,7 @@
 #define LLVM_MC_MCREGISTER_H
 
 #include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/ADT/Hashing.h"
 #include <cassert>
 #include <limits>
 
diff --git a/llvm/include/llvm/MC/MCSchedule.h b/llvm/include/llvm/MC/MCSchedule.h
index acfbfd387ff3..6dffc158af50 100644
--- a/llvm/include/llvm/MC/MCSchedule.h
+++ b/llvm/include/llvm/MC/MCSchedule.h
@@ -14,7 +14,6 @@
 #ifndef LLVM_MC_MCSCHEDULE_H
 #define LLVM_MC_MCSCHEDULE_H
 
-#include "llvm/ADT/Optional.h"
 #include "llvm/Config/llvm-config.h"
 #include "llvm/Support/DataTypes.h"
 #include <cassert>
diff --git a/llvm/include/llvm/MC/MCStreamer.h b/llvm/include/llvm/MC/MCStreamer.h
index fd326ff18712..e00f50f617fa 100644
--- a/llvm/include/llvm/MC/MCStreamer.h
+++ b/llvm/include/llvm/MC/MCStreamer.h
@@ -123,6 +123,8 @@ public:
   /// This is used to emit bytes in \p Data as sequence of .byte directives.
   virtual void emitRawBytes(StringRef Data);
 
+  virtual void emitConstantPools();
+
   virtual void finish();
 };
 
@@ -165,7 +167,7 @@ public:
 
   virtual void emitThumbSet(MCSymbol *Symbol, const MCExpr *Value);
 
-  void finish() override;
+  void emitConstantPools() override;
 
   /// Reset any state between object emissions, i.e. the equivalent of
   /// MCStreamer's reset method.
@@ -445,7 +447,7 @@ public:
   }
 
   /// Create the default sections and set the initial one.
-  virtual void InitSections(bool NoExecStack);
+  virtual void initSections(bool NoExecStack, const MCSubtargetInfo &STI);
 
   MCSymbol *endSection(MCSection *Section);
 
@@ -797,7 +799,7 @@ public:
                         SMLoc Loc = SMLoc());
 
   virtual void emitNops(int64_t NumBytes, int64_t ControlledNopLength,
-                        SMLoc Loc);
+                        SMLoc Loc, const MCSubtargetInfo& STI);
 
   /// Emit NumBytes worth of zeros.
   /// This function properly handles data in virtual sections.
@@ -831,10 +833,12 @@ public:
   ///
   /// \param ByteAlignment - The alignment to reach. This must be a power of
   /// two on some targets.
+  /// \param STI - The MCSubtargetInfo in operation when padding is emitted.
   /// \param MaxBytesToEmit - The maximum numbers of bytes to emit, or 0. If
   /// the alignment cannot be reached in this many bytes, no bytes are
   /// emitted.
   virtual void emitCodeAlignment(unsigned ByteAlignment,
+                                 const MCSubtargetInfo *STI,
                                  unsigned MaxBytesToEmit = 0);
 
   /// Emit some number of copies of \p Value until the byte offset \p
diff --git a/llvm/include/llvm/MC/MCSymbolWasm.h b/llvm/include/llvm/MC/MCSymbolWasm.h
index 852ab678e616..5a4852e0e895 100644
--- a/llvm/include/llvm/MC/MCSymbolWasm.h
+++ b/llvm/include/llvm/MC/MCSymbolWasm.h
@@ -27,7 +27,6 @@ class MCSymbolWasm : public MCSymbol {
   wasm::WasmSignature *Signature = nullptr;
   Optional<wasm::WasmGlobalType> GlobalType;
   Optional<wasm::WasmTableType> TableType;
-  Optional<wasm::WasmTagType> TagType;
 
   /// An expression describing how to calculate the size of a symbol. If a
   /// symbol has no size this field will be NULL.
@@ -67,6 +66,11 @@ public:
     modifyFlags(wasm::WASM_SYMBOL_NO_STRIP, wasm::WASM_SYMBOL_NO_STRIP);
   }
 
+  bool isTLS() const { return getFlags() & wasm::WASM_SYMBOL_TLS; }
+  void setTLS() const {
+    modifyFlags(wasm::WASM_SYMBOL_TLS, wasm::WASM_SYMBOL_TLS);
+  }
+
   bool isWeak() const { return IsWeak; }
   void setWeak(bool isWeak) { IsWeak = isWeak; }
 
@@ -142,12 +146,6 @@ public:
     wasm::WasmLimits Limits = {wasm::WASM_LIMITS_FLAG_NONE, 0, 0};
     setTableType({uint8_t(VT), Limits});
   }
-
-  const wasm::WasmTagType &getTagType() const {
-    assert(TagType.hasValue());
-    return TagType.getValue();
-  }
-  void setTagType(wasm::WasmTagType ET) { TagType = ET; }
 };
 
 } // end namespace llvm
diff --git a/llvm/include/llvm/MC/MCWasmStreamer.h b/llvm/include/llvm/MC/MCWasmStreamer.h
index 6651f071f799..818f59e5ab3e 100644
--- a/llvm/include/llvm/MC/MCWasmStreamer.h
+++ b/llvm/include/llvm/MC/MCWasmStreamer.h
@@ -41,6 +41,9 @@ public:
   /// @{
 
   void changeSection(MCSection *Section, const MCExpr *Subsection) override;
+  void emitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override;
+  void emitLabelAtPos(MCSymbol *Symbol, SMLoc Loc, MCFragment *F,
+                      uint64_t Offset) override;
   void emitAssemblerFlag(MCAssemblerFlag Flag) override;
   void emitThumbFunc(MCSymbol *Func) override;
   void emitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) override;
@@ -68,6 +71,8 @@ private:
   void emitInstToFragment(const MCInst &Inst, const MCSubtargetInfo &) override;
   void emitInstToData(const MCInst &Inst, const MCSubtargetInfo &) override;
 
+  void fixSymbolsInTLSFixups(const MCExpr *expr);
+
   /// Merge the content of the fragment \p EF into the fragment \p DF.
   void mergeFragment(MCDataFragment *, MCDataFragment *);
 
diff --git a/llvm/include/llvm/MC/MCWinCOFFStreamer.h b/llvm/include/llvm/MC/MCWinCOFFStreamer.h
index 53b2ef0bd96e..af1ed6faf753 100644
--- a/llvm/include/llvm/MC/MCWinCOFFStreamer.h
+++ b/llvm/include/llvm/MC/MCWinCOFFStreamer.h
@@ -39,7 +39,7 @@ public:
   /// \name MCStreamer interface
   /// \{
 
-  void InitSections(bool NoExecStack) override;
+  void initSections(bool NoExecStack, const MCSubtargetInfo &STI) override;
   void emitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override;
   void emitAssemblerFlag(MCAssemblerFlag Flag) override;
   void emitThumbFunc(MCSymbol *Func) override;
diff --git a/llvm/include/llvm/MC/TargetRegistry.h b/llvm/include/llvm/MC/TargetRegistry.h
new file mode 100644
index 000000000000..da9a9269edbf
--- /dev/null
+++ b/llvm/include/llvm/MC/TargetRegistry.h
@@ -0,0 +1,1373 @@
+//===- MC/TargetRegistry.h - Target Registration ----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file exposes the TargetRegistry interface, which tools can use to access
+// the appropriate target specific classes (TargetMachine, AsmPrinter, etc.)
+// which have been registered.
+//
+// Target specific class implementations should register themselves using the
+// appropriate TargetRegistry interfaces.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_TARGETREGISTRY_H
+#define LLVM_MC_TARGETREGISTRY_H
+
+#include "llvm-c/DisassemblerTypes.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <iterator>
+#include <memory>
+#include <string>
+
+namespace llvm {
+
+class AsmPrinter;
+class MCAsmBackend;
+class MCAsmInfo;
+class MCAsmParser;
+class MCCodeEmitter;
+class MCContext;
+class MCDisassembler;
+class MCInstPrinter;
+class MCInstrAnalysis;
+class MCInstrInfo;
+class MCObjectWriter;
+class MCRegisterInfo;
+class MCRelocationInfo;
+class MCStreamer;
+class MCSubtargetInfo;
+class MCSymbolizer;
+class MCTargetAsmParser;
+class MCTargetOptions;
+class MCTargetStreamer;
+class raw_ostream;
+class raw_pwrite_stream;
+class TargetMachine;
+class TargetOptions;
+namespace mca {
+class CustomBehaviour;
+class InstrPostProcess;
+class SourceMgr;
+} // namespace mca
+
+MCStreamer *createNullStreamer(MCContext &Ctx);
+// Takes ownership of \p TAB and \p CE.
+
+/// Create a machine code streamer which will print out assembly for the native
+/// target, suitable for compiling with a native assembler.
+///
+/// \param InstPrint - If given, the instruction printer to use. If not given
+/// the MCInst representation will be printed.  This method takes ownership of
+/// InstPrint.
+///
+/// \param CE - If given, a code emitter to use to show the instruction
+/// encoding inline with the assembly. This method takes ownership of \p CE.
+///
+/// \param TAB - If given, a target asm backend to use to show the fixup
+/// information in conjunction with encoding information. This method takes
+/// ownership of \p TAB.
+///
+/// \param ShowInst - Whether to show the MCInst representation inline with
+/// the assembly.
+MCStreamer *
+createAsmStreamer(MCContext &Ctx, std::unique_ptr<formatted_raw_ostream> OS,
+                  bool isVerboseAsm, bool useDwarfDirectory,
+                  MCInstPrinter *InstPrint, std::unique_ptr<MCCodeEmitter> &&CE,
+                  std::unique_ptr<MCAsmBackend> &&TAB, bool ShowInst);
+
+MCStreamer *createELFStreamer(MCContext &Ctx,
+                              std::unique_ptr<MCAsmBackend> &&TAB,
+                              std::unique_ptr<MCObjectWriter> &&OW,
+                              std::unique_ptr<MCCodeEmitter> &&CE,
+                              bool RelaxAll);
+MCStreamer *createMachOStreamer(MCContext &Ctx,
+                                std::unique_ptr<MCAsmBackend> &&TAB,
+                                std::unique_ptr<MCObjectWriter> &&OW,
+                                std::unique_ptr<MCCodeEmitter> &&CE,
+                                bool RelaxAll, bool DWARFMustBeAtTheEnd,
+                                bool LabelSections = false);
+MCStreamer *createWasmStreamer(MCContext &Ctx,
+                               std::unique_ptr<MCAsmBackend> &&TAB,
+                               std::unique_ptr<MCObjectWriter> &&OW,
+                               std::unique_ptr<MCCodeEmitter> &&CE,
+                               bool RelaxAll);
+MCStreamer *createXCOFFStreamer(MCContext &Ctx,
+                                std::unique_ptr<MCAsmBackend> &&TAB,
+                                std::unique_ptr<MCObjectWriter> &&OW,
+                                std::unique_ptr<MCCodeEmitter> &&CE,
+                                bool RelaxAll);
+
+MCRelocationInfo *createMCRelocationInfo(const Triple &TT, MCContext &Ctx);
+
+MCSymbolizer *createMCSymbolizer(const Triple &TT, LLVMOpInfoCallback GetOpInfo,
+                                 LLVMSymbolLookupCallback SymbolLookUp,
+                                 void *DisInfo, MCContext *Ctx,
+                                 std::unique_ptr<MCRelocationInfo> &&RelInfo);
+
+mca::CustomBehaviour *createCustomBehaviour(const MCSubtargetInfo &STI,
+                                            const mca::SourceMgr &SrcMgr,
+                                            const MCInstrInfo &MCII);
+
+mca::InstrPostProcess *createInstrPostProcess(const MCSubtargetInfo &STI,
+                                              const MCInstrInfo &MCII);
+
+/// Target - Wrapper for Target specific information.
+///
+/// For registration purposes, this is a POD type so that targets can be
+/// registered without the use of static constructors.
+///
+/// Targets should implement a single global instance of this class (which
+/// will be zero initialized), and pass that instance to the TargetRegistry as
+/// part of their initialization.
+class Target {
+public:
+  friend struct TargetRegistry;
+
+  using ArchMatchFnTy = bool (*)(Triple::ArchType Arch);
+
+  using MCAsmInfoCtorFnTy = MCAsmInfo *(*)(const MCRegisterInfo &MRI,
+                                           const Triple &TT,
+                                           const MCTargetOptions &Options);
+  using MCObjectFileInfoCtorFnTy = MCObjectFileInfo *(*)(MCContext &Ctx,
+                                                         bool PIC,
+                                                         bool LargeCodeModel);
+  using MCInstrInfoCtorFnTy = MCInstrInfo *(*)();
+  using MCInstrAnalysisCtorFnTy = MCInstrAnalysis *(*)(const MCInstrInfo *Info);
+  using MCRegInfoCtorFnTy = MCRegisterInfo *(*)(const Triple &TT);
+  using MCSubtargetInfoCtorFnTy = MCSubtargetInfo *(*)(const Triple &TT,
+                                                       StringRef CPU,
+                                                       StringRef Features);
+  using TargetMachineCtorTy = TargetMachine
+      *(*)(const Target &T, const Triple &TT, StringRef CPU, StringRef Features,
+           const TargetOptions &Options, Optional<Reloc::Model> RM,
+           Optional<CodeModel::Model> CM, CodeGenOpt::Level OL, bool JIT);
+  // If it weren't for layering issues (this header is in llvm/Support, but
+  // depends on MC?) this should take the Streamer by value rather than rvalue
+  // reference.
+  using AsmPrinterCtorTy = AsmPrinter *(*)(
+      TargetMachine &TM, std::unique_ptr<MCStreamer> &&Streamer);
+  using MCAsmBackendCtorTy = MCAsmBackend *(*)(const Target &T,
+                                               const MCSubtargetInfo &STI,
+                                               const MCRegisterInfo &MRI,
+                                               const MCTargetOptions &Options);
+  using MCAsmParserCtorTy = MCTargetAsmParser *(*)(
+      const MCSubtargetInfo &STI, MCAsmParser &P, const MCInstrInfo &MII,
+      const MCTargetOptions &Options);
+  using MCDisassemblerCtorTy = MCDisassembler *(*)(const Target &T,
+                                                   const MCSubtargetInfo &STI,
+                                                   MCContext &Ctx);
+  using MCInstPrinterCtorTy = MCInstPrinter *(*)(const Triple &T,
+                                                 unsigned SyntaxVariant,
+                                                 const MCAsmInfo &MAI,
+                                                 const MCInstrInfo &MII,
+                                                 const MCRegisterInfo &MRI);
+  using MCCodeEmitterCtorTy = MCCodeEmitter *(*)(const MCInstrInfo &II,
+                                                 const MCRegisterInfo &MRI,
+                                                 MCContext &Ctx);
+  using ELFStreamerCtorTy =
+      MCStreamer *(*)(const Triple &T, MCContext &Ctx,
+                      std::unique_ptr<MCAsmBackend> &&TAB,
+                      std::unique_ptr<MCObjectWriter> &&OW,
+                      std::unique_ptr<MCCodeEmitter> &&Emitter, bool RelaxAll);
+  using MachOStreamerCtorTy =
+      MCStreamer *(*)(MCContext &Ctx, std::unique_ptr<MCAsmBackend> &&TAB,
+                      std::unique_ptr<MCObjectWriter> &&OW,
+                      std::unique_ptr<MCCodeEmitter> &&Emitter, bool RelaxAll,
+                      bool DWARFMustBeAtTheEnd);
+  using COFFStreamerCtorTy =
+      MCStreamer *(*)(MCContext &Ctx, std::unique_ptr<MCAsmBackend> &&TAB,
+                      std::unique_ptr<MCObjectWriter> &&OW,
+                      std::unique_ptr<MCCodeEmitter> &&Emitter, bool RelaxAll,
+                      bool IncrementalLinkerCompatible);
+  using WasmStreamerCtorTy =
+      MCStreamer *(*)(const Triple &T, MCContext &Ctx,
+                      std::unique_ptr<MCAsmBackend> &&TAB,
+                      std::unique_ptr<MCObjectWriter> &&OW,
+                      std::unique_ptr<MCCodeEmitter> &&Emitter, bool RelaxAll);
+  using XCOFFStreamerCtorTy =
+      MCStreamer *(*)(const Triple &T, MCContext &Ctx,
+                      std::unique_ptr<MCAsmBackend> &&TAB,
+                      std::unique_ptr<MCObjectWriter> &&OW,
+                      std::unique_ptr<MCCodeEmitter> &&Emitter, bool RelaxAll);
+
+  using NullTargetStreamerCtorTy = MCTargetStreamer *(*)(MCStreamer &S);
+  using AsmTargetStreamerCtorTy = MCTargetStreamer *(*)(
+      MCStreamer &S, formatted_raw_ostream &OS, MCInstPrinter *InstPrint,
+      bool IsVerboseAsm);
+  using ObjectTargetStreamerCtorTy = MCTargetStreamer *(*)(
+      MCStreamer &S, const MCSubtargetInfo &STI);
+  using MCRelocationInfoCtorTy = MCRelocationInfo *(*)(const Triple &TT,
+                                                       MCContext &Ctx);
+  using MCSymbolizerCtorTy = MCSymbolizer *(*)(
+      const Triple &TT, LLVMOpInfoCallback GetOpInfo,
+      LLVMSymbolLookupCallback SymbolLookUp, void *DisInfo, MCContext *Ctx,
+      std::unique_ptr<MCRelocationInfo> &&RelInfo);
+
+  using CustomBehaviourCtorTy =
+      mca::CustomBehaviour *(*)(const MCSubtargetInfo &STI,
+                                const mca::SourceMgr &SrcMgr,
+                                const MCInstrInfo &MCII);
+
+  using InstrPostProcessCtorTy =
+      mca::InstrPostProcess *(*)(const MCSubtargetInfo &STI,
+                                 const MCInstrInfo &MCII);
+
+private:
+  /// Next - The next registered target in the linked list, maintained by the
+  /// TargetRegistry.
+  Target *Next;
+
+  /// The target function for checking if an architecture is supported.
+  ArchMatchFnTy ArchMatchFn;
+
+  /// Name - The target name.
+  const char *Name;
+
+  /// ShortDesc - A short description of the target.
+  const char *ShortDesc;
+
+  /// BackendName - The name of the backend implementation. This must match the
+  /// name of the 'def X : Target ...' in TableGen.
+  const char *BackendName;
+
+  /// HasJIT - Whether this target supports the JIT.
+  bool HasJIT;
+
+  /// MCAsmInfoCtorFn - Constructor function for this target's MCAsmInfo, if
+  /// registered.
+  MCAsmInfoCtorFnTy MCAsmInfoCtorFn;
+
+  /// Constructor function for this target's MCObjectFileInfo, if registered.
+  MCObjectFileInfoCtorFnTy MCObjectFileInfoCtorFn;
+
+  /// MCInstrInfoCtorFn - Constructor function for this target's MCInstrInfo,
+  /// if registered.
+  MCInstrInfoCtorFnTy MCInstrInfoCtorFn;
+
+  /// MCInstrAnalysisCtorFn - Constructor function for this target's
+  /// MCInstrAnalysis, if registered.
+  MCInstrAnalysisCtorFnTy MCInstrAnalysisCtorFn;
+
+  /// MCRegInfoCtorFn - Constructor function for this target's MCRegisterInfo,
+  /// if registered.
+  MCRegInfoCtorFnTy MCRegInfoCtorFn;
+
+  /// MCSubtargetInfoCtorFn - Constructor function for this target's
+  /// MCSubtargetInfo, if registered.
+  MCSubtargetInfoCtorFnTy MCSubtargetInfoCtorFn;
+
+  /// TargetMachineCtorFn - Construction function for this target's
+  /// TargetMachine, if registered.
+  TargetMachineCtorTy TargetMachineCtorFn;
+
+  /// MCAsmBackendCtorFn - Construction function for this target's
+  /// MCAsmBackend, if registered.
+  MCAsmBackendCtorTy MCAsmBackendCtorFn;
+
+  /// MCAsmParserCtorFn - Construction function for this target's
+  /// MCTargetAsmParser, if registered.
+  MCAsmParserCtorTy MCAsmParserCtorFn;
+
+  /// AsmPrinterCtorFn - Construction function for this target's AsmPrinter,
+  /// if registered.
+  AsmPrinterCtorTy AsmPrinterCtorFn;
+
+  /// MCDisassemblerCtorFn - Construction function for this target's
+  /// MCDisassembler, if registered.
+  MCDisassemblerCtorTy MCDisassemblerCtorFn;
+
+  /// MCInstPrinterCtorFn - Construction function for this target's
+  /// MCInstPrinter, if registered.
+  MCInstPrinterCtorTy MCInstPrinterCtorFn;
+
+  /// MCCodeEmitterCtorFn - Construction function for this target's
+  /// CodeEmitter, if registered.
+  MCCodeEmitterCtorTy MCCodeEmitterCtorFn;
+
+  // Construction functions for the various object formats, if registered.
+  COFFStreamerCtorTy COFFStreamerCtorFn = nullptr;
+  MachOStreamerCtorTy MachOStreamerCtorFn = nullptr;
+  ELFStreamerCtorTy ELFStreamerCtorFn = nullptr;
+  WasmStreamerCtorTy WasmStreamerCtorFn = nullptr;
+  XCOFFStreamerCtorTy XCOFFStreamerCtorFn = nullptr;
+
+  /// Construction function for this target's null TargetStreamer, if
+  /// registered (default = nullptr).
+  NullTargetStreamerCtorTy NullTargetStreamerCtorFn = nullptr;
+
+  /// Construction function for this target's asm TargetStreamer, if
+  /// registered (default = nullptr).
+  AsmTargetStreamerCtorTy AsmTargetStreamerCtorFn = nullptr;
+
+  /// Construction function for this target's obj TargetStreamer, if
+  /// registered (default = nullptr).
+  ObjectTargetStreamerCtorTy ObjectTargetStreamerCtorFn = nullptr;
+
+  /// MCRelocationInfoCtorFn - Construction function for this target's
+  /// MCRelocationInfo, if registered (default = llvm::createMCRelocationInfo)
+  MCRelocationInfoCtorTy MCRelocationInfoCtorFn = nullptr;
+
+  /// MCSymbolizerCtorFn - Construction function for this target's
+  /// MCSymbolizer, if registered (default = llvm::createMCSymbolizer)
+  MCSymbolizerCtorTy MCSymbolizerCtorFn = nullptr;
+
+  /// CustomBehaviourCtorFn - Construction function for this target's
+  /// CustomBehaviour, if registered (default = nullptr).
+  CustomBehaviourCtorTy CustomBehaviourCtorFn = nullptr;
+
+  /// InstrPostProcessCtorFn - Construction function for this target's
+  /// InstrPostProcess, if registered (default = nullptr).
+  InstrPostProcessCtorTy InstrPostProcessCtorFn = nullptr;
+
+public:
+  Target() = default;
+
+  /// @name Target Information
+  /// @{
+
+  // getNext - Return the next registered target.
+  const Target *getNext() const { return Next; }
+
+  /// getName - Get the target name.
+  const char *getName() const { return Name; }
+
+  /// getShortDescription - Get a short description of the target.
+  const char *getShortDescription() const { return ShortDesc; }
+
+  /// getBackendName - Get the backend name.
+  const char *getBackendName() const { return BackendName; }
+
+  /// @}
+  /// @name Feature Predicates
+  /// @{
+
+  /// hasJIT - Check if this targets supports the just-in-time compilation.
+  bool hasJIT() const { return HasJIT; }
+
+  /// hasTargetMachine - Check if this target supports code generation.
+  bool hasTargetMachine() const { return TargetMachineCtorFn != nullptr; }
+
+  /// hasMCAsmBackend - Check if this target supports .o generation.
+  bool hasMCAsmBackend() const { return MCAsmBackendCtorFn != nullptr; }
+
+  /// hasMCAsmParser - Check if this target supports assembly parsing.
+  bool hasMCAsmParser() const { return MCAsmParserCtorFn != nullptr; }
+
+  /// @}
+  /// @name Feature Constructors
+  /// @{
+
+  /// createMCAsmInfo - Create a MCAsmInfo implementation for the specified
+  /// target triple.
+  ///
+  /// \param TheTriple This argument is used to determine the target machine
+  /// feature set; it should always be provided. Generally this should be
+  /// either the target triple from the module, or the target triple of the
+  /// host if that does not exist.
+  MCAsmInfo *createMCAsmInfo(const MCRegisterInfo &MRI, StringRef TheTriple,
+                             const MCTargetOptions &Options) const {
+    if (!MCAsmInfoCtorFn)
+      return nullptr;
+    return MCAsmInfoCtorFn(MRI, Triple(TheTriple), Options);
+  }
+
+  /// Create a MCObjectFileInfo implementation for the specified target
+  /// triple.
+  ///
+  MCObjectFileInfo *createMCObjectFileInfo(MCContext &Ctx, bool PIC,
+                                           bool LargeCodeModel = false) const {
+    if (!MCObjectFileInfoCtorFn) {
+      MCObjectFileInfo *MOFI = new MCObjectFileInfo();
+      MOFI->initMCObjectFileInfo(Ctx, PIC, LargeCodeModel);
+      return MOFI;
+    }
+    return MCObjectFileInfoCtorFn(Ctx, PIC, LargeCodeModel);
+  }
+
+  /// createMCInstrInfo - Create a MCInstrInfo implementation.
+  ///
+  MCInstrInfo *createMCInstrInfo() const {
+    if (!MCInstrInfoCtorFn)
+      return nullptr;
+    return MCInstrInfoCtorFn();
+  }
+
+  /// createMCInstrAnalysis - Create a MCInstrAnalysis implementation.
+  ///
+  MCInstrAnalysis *createMCInstrAnalysis(const MCInstrInfo *Info) const {
+    if (!MCInstrAnalysisCtorFn)
+      return nullptr;
+    return MCInstrAnalysisCtorFn(Info);
+  }
+
+  /// createMCRegInfo - Create a MCRegisterInfo implementation.
+  ///
+  MCRegisterInfo *createMCRegInfo(StringRef TT) const {
+    if (!MCRegInfoCtorFn)
+      return nullptr;
+    return MCRegInfoCtorFn(Triple(TT));
+  }
+
+  /// createMCSubtargetInfo - Create a MCSubtargetInfo implementation.
+  ///
+  /// \param TheTriple This argument is used to determine the target machine
+  /// feature set; it should always be provided. Generally this should be
+  /// either the target triple from the module, or the target triple of the
+  /// host if that does not exist.
+  /// \param CPU This specifies the name of the target CPU.
+  /// \param Features This specifies the string representation of the
+  /// additional target features.
+  MCSubtargetInfo *createMCSubtargetInfo(StringRef TheTriple, StringRef CPU,
+                                         StringRef Features) const {
+    if (!MCSubtargetInfoCtorFn)
+      return nullptr;
+    return MCSubtargetInfoCtorFn(Triple(TheTriple), CPU, Features);
+  }
+
+  /// createTargetMachine - Create a target specific machine implementation
+  /// for the specified \p Triple.
+  ///
+  /// \param TT This argument is used to determine the target machine
+  /// feature set; it should always be provided. Generally this should be
+  /// either the target triple from the module, or the target triple of the
+  /// host if that does not exist.
+  TargetMachine *createTargetMachine(StringRef TT, StringRef CPU,
+                                     StringRef Features,
+                                     const TargetOptions &Options,
+                                     Optional<Reloc::Model> RM,
+                                     Optional<CodeModel::Model> CM = None,
+                                     CodeGenOpt::Level OL = CodeGenOpt::Default,
+                                     bool JIT = false) const {
+    if (!TargetMachineCtorFn)
+      return nullptr;
+    return TargetMachineCtorFn(*this, Triple(TT), CPU, Features, Options, RM,
+                               CM, OL, JIT);
+  }
+
+  /// createMCAsmBackend - Create a target specific assembly parser.
+  MCAsmBackend *createMCAsmBackend(const MCSubtargetInfo &STI,
+                                   const MCRegisterInfo &MRI,
+                                   const MCTargetOptions &Options) const {
+    if (!MCAsmBackendCtorFn)
+      return nullptr;
+    return MCAsmBackendCtorFn(*this, STI, MRI, Options);
+  }
+
+  /// createMCAsmParser - Create a target specific assembly parser.
+  ///
+  /// \param Parser The target independent parser implementation to use for
+  /// parsing and lexing.
+  MCTargetAsmParser *createMCAsmParser(const MCSubtargetInfo &STI,
+                                       MCAsmParser &Parser,
+                                       const MCInstrInfo &MII,
+                                       const MCTargetOptions &Options) const {
+    if (!MCAsmParserCtorFn)
+      return nullptr;
+    return MCAsmParserCtorFn(STI, Parser, MII, Options);
+  }
+
+  /// createAsmPrinter - Create a target specific assembly printer pass.  This
+  /// takes ownership of the MCStreamer object.
+  AsmPrinter *createAsmPrinter(TargetMachine &TM,
+                               std::unique_ptr<MCStreamer> &&Streamer) const {
+    if (!AsmPrinterCtorFn)
+      return nullptr;
+    return AsmPrinterCtorFn(TM, std::move(Streamer));
+  }
+
+  MCDisassembler *createMCDisassembler(const MCSubtargetInfo &STI,
+                                       MCContext &Ctx) const {
+    if (!MCDisassemblerCtorFn)
+      return nullptr;
+    return MCDisassemblerCtorFn(*this, STI, Ctx);
+  }
+
+  MCInstPrinter *createMCInstPrinter(const Triple &T, unsigned SyntaxVariant,
+                                     const MCAsmInfo &MAI,
+                                     const MCInstrInfo &MII,
+                                     const MCRegisterInfo &MRI) const {
+    if (!MCInstPrinterCtorFn)
+      return nullptr;
+    return MCInstPrinterCtorFn(T, SyntaxVariant, MAI, MII, MRI);
+  }
+
+  /// createMCCodeEmitter - Create a target specific code emitter.
+  MCCodeEmitter *createMCCodeEmitter(const MCInstrInfo &II,
+                                     const MCRegisterInfo &MRI,
+                                     MCContext &Ctx) const {
+    if (!MCCodeEmitterCtorFn)
+      return nullptr;
+    return MCCodeEmitterCtorFn(II, MRI, Ctx);
+  }
+
+  /// Create a target specific MCStreamer.
+  ///
+  /// \param T The target triple.
+  /// \param Ctx The target context.
+  /// \param TAB The target assembler backend object. Takes ownership.
+  /// \param OW The stream object.
+  /// \param Emitter The target independent assembler object.Takes ownership.
+  /// \param RelaxAll Relax all fixups?
+  MCStreamer *createMCObjectStreamer(const Triple &T, MCContext &Ctx,
+                                     std::unique_ptr<MCAsmBackend> &&TAB,
+                                     std::unique_ptr<MCObjectWriter> &&OW,
+                                     std::unique_ptr<MCCodeEmitter> &&Emitter,
+                                     const MCSubtargetInfo &STI, bool RelaxAll,
+                                     bool IncrementalLinkerCompatible,
+                                     bool DWARFMustBeAtTheEnd) const {
+    MCStreamer *S = nullptr;
+    switch (T.getObjectFormat()) {
+    case Triple::UnknownObjectFormat:
+      llvm_unreachable("Unknown object format");
+    case Triple::COFF:
+      assert(T.isOSWindows() && "only Windows COFF is supported");
+      S = COFFStreamerCtorFn(Ctx, std::move(TAB), std::move(OW),
+                             std::move(Emitter), RelaxAll,
+                             IncrementalLinkerCompatible);
+      break;
+    case Triple::MachO:
+      if (MachOStreamerCtorFn)
+        S = MachOStreamerCtorFn(Ctx, std::move(TAB), std::move(OW),
+                                std::move(Emitter), RelaxAll,
+                                DWARFMustBeAtTheEnd);
+      else
+        S = createMachOStreamer(Ctx, std::move(TAB), std::move(OW),
+                                std::move(Emitter), RelaxAll,
+                                DWARFMustBeAtTheEnd);
+      break;
+    case Triple::ELF:
+      if (ELFStreamerCtorFn)
+        S = ELFStreamerCtorFn(T, Ctx, std::move(TAB), std::move(OW),
+                              std::move(Emitter), RelaxAll);
+      else
+        S = createELFStreamer(Ctx, std::move(TAB), std::move(OW),
+                              std::move(Emitter), RelaxAll);
+      break;
+    case Triple::Wasm:
+      if (WasmStreamerCtorFn)
+        S = WasmStreamerCtorFn(T, Ctx, std::move(TAB), std::move(OW),
+                               std::move(Emitter), RelaxAll);
+      else
+        S = createWasmStreamer(Ctx, std::move(TAB), std::move(OW),
+                               std::move(Emitter), RelaxAll);
+      break;
+    case Triple::GOFF:
+      report_fatal_error("GOFF MCObjectStreamer not implemented yet");
+    case Triple::XCOFF:
+      if (XCOFFStreamerCtorFn)
+        S = XCOFFStreamerCtorFn(T, Ctx, std::move(TAB), std::move(OW),
+                                std::move(Emitter), RelaxAll);
+      else
+        S = createXCOFFStreamer(Ctx, std::move(TAB), std::move(OW),
+                                std::move(Emitter), RelaxAll);
+      break;
+    }
+    if (ObjectTargetStreamerCtorFn)
+      ObjectTargetStreamerCtorFn(*S, STI);
+    return S;
+  }
+
+  MCStreamer *createAsmStreamer(MCContext &Ctx,
+                                std::unique_ptr<formatted_raw_ostream> OS,
+                                bool IsVerboseAsm, bool UseDwarfDirectory,
+                                MCInstPrinter *InstPrint,
+                                std::unique_ptr<MCCodeEmitter> &&CE,
+                                std::unique_ptr<MCAsmBackend> &&TAB,
+                                bool ShowInst) const {
+    formatted_raw_ostream &OSRef = *OS;
+    MCStreamer *S = llvm::createAsmStreamer(
+        Ctx, std::move(OS), IsVerboseAsm, UseDwarfDirectory, InstPrint,
+        std::move(CE), std::move(TAB), ShowInst);
+    createAsmTargetStreamer(*S, OSRef, InstPrint, IsVerboseAsm);
+    return S;
+  }
+
+  MCTargetStreamer *createAsmTargetStreamer(MCStreamer &S,
+                                            formatted_raw_ostream &OS,
+                                            MCInstPrinter *InstPrint,
+                                            bool IsVerboseAsm) const {
+    if (AsmTargetStreamerCtorFn)
+      return AsmTargetStreamerCtorFn(S, OS, InstPrint, IsVerboseAsm);
+    return nullptr;
+  }
+
+  MCStreamer *createNullStreamer(MCContext &Ctx) const {
+    MCStreamer *S = llvm::createNullStreamer(Ctx);
+    createNullTargetStreamer(*S);
+    return S;
+  }
+
+  MCTargetStreamer *createNullTargetStreamer(MCStreamer &S) const {
+    if (NullTargetStreamerCtorFn)
+      return NullTargetStreamerCtorFn(S);
+    return nullptr;
+  }
+
+  /// createMCRelocationInfo - Create a target specific MCRelocationInfo.
+  ///
+  /// \param TT The target triple.
+  /// \param Ctx The target context.
+  MCRelocationInfo *createMCRelocationInfo(StringRef TT, MCContext &Ctx) const {
+    MCRelocationInfoCtorTy Fn = MCRelocationInfoCtorFn
+                                    ? MCRelocationInfoCtorFn
+                                    : llvm::createMCRelocationInfo;
+    return Fn(Triple(TT), Ctx);
+  }
+
+  /// createMCSymbolizer - Create a target specific MCSymbolizer.
+  ///
+  /// \param TT The target triple.
+  /// \param GetOpInfo The function to get the symbolic information for
+  /// operands.
+  /// \param SymbolLookUp The function to lookup a symbol name.
+  /// \param DisInfo The pointer to the block of symbolic information for above
+  /// call
+  /// back.
+  /// \param Ctx The target context.
+  /// \param RelInfo The relocation information for this target. Takes
+  /// ownership.
+  MCSymbolizer *
+  createMCSymbolizer(StringRef TT, LLVMOpInfoCallback GetOpInfo,
+                     LLVMSymbolLookupCallback SymbolLookUp, void *DisInfo,
+                     MCContext *Ctx,
+                     std::unique_ptr<MCRelocationInfo> &&RelInfo) const {
+    MCSymbolizerCtorTy Fn =
+        MCSymbolizerCtorFn ? MCSymbolizerCtorFn : llvm::createMCSymbolizer;
+    return Fn(Triple(TT), GetOpInfo, SymbolLookUp, DisInfo, Ctx,
+              std::move(RelInfo));
+  }
+
+  /// createCustomBehaviour - Create a target specific CustomBehaviour.
+  /// This class is used by llvm-mca and requires backend functionality.
+  mca::CustomBehaviour *createCustomBehaviour(const MCSubtargetInfo &STI,
+                                              const mca::SourceMgr &SrcMgr,
+                                              const MCInstrInfo &MCII) const {
+    if (CustomBehaviourCtorFn)
+      return CustomBehaviourCtorFn(STI, SrcMgr, MCII);
+    return nullptr;
+  }
+
+  /// createInstrPostProcess - Create a target specific InstrPostProcess.
+  /// This class is used by llvm-mca and requires backend functionality.
+  mca::InstrPostProcess *createInstrPostProcess(const MCSubtargetInfo &STI,
+                                                const MCInstrInfo &MCII) const {
+    if (InstrPostProcessCtorFn)
+      return InstrPostProcessCtorFn(STI, MCII);
+    return nullptr;
+  }
+
+  /// @}
+};
+
+/// TargetRegistry - Generic interface to target specific features.
+struct TargetRegistry {
+  // FIXME: Make this a namespace, probably just move all the Register*
+  // functions into Target (currently they all just set members on the Target
+  // anyway, and Target friends this class so those functions can...
+  // function).
+  TargetRegistry() = delete;
+
+  class iterator {
+    friend struct TargetRegistry;
+
+    const Target *Current = nullptr;
+
+    explicit iterator(Target *T) : Current(T) {}
+
+  public:
+    using iterator_category = std::forward_iterator_tag;
+    using value_type = Target;
+    using difference_type = std::ptrdiff_t;
+    using pointer = value_type *;
+    using reference = value_type &;
+
+    iterator() = default;
+
+    bool operator==(const iterator &x) const { return Current == x.Current; }
+    bool operator!=(const iterator &x) const { return !operator==(x); }
+
+    // Iterator traversal: forward iteration only
+    iterator &operator++() { // Preincrement
+      assert(Current && "Cannot increment end iterator!");
+      Current = Current->getNext();
+      return *this;
+    }
+    iterator operator++(int) { // Postincrement
+      iterator tmp = *this;
+      ++*this;
+      return tmp;
+    }
+
+    const Target &operator*() const {
+      assert(Current && "Cannot dereference end iterator!");
+      return *Current;
+    }
+
+    const Target *operator->() const { return &operator*(); }
+  };
+
+  /// printRegisteredTargetsForVersion - Print the registered targets
+  /// appropriately for inclusion in a tool's version output.
+  static void printRegisteredTargetsForVersion(raw_ostream &OS);
+
+  /// @name Registry Access
+  /// @{
+
+  static iterator_range<iterator> targets();
+
+  /// lookupTarget - Lookup a target based on a target triple.
+  ///
+  /// \param Triple - The triple to use for finding a target.
+  /// \param Error - On failure, an error string describing why no target was
+  /// found.
+  static const Target *lookupTarget(const std::string &Triple,
+                                    std::string &Error);
+
+  /// lookupTarget - Lookup a target based on an architecture name
+  /// and a target triple.  If the architecture name is non-empty,
+  /// then the lookup is done by architecture.  Otherwise, the target
+  /// triple is used.
+  ///
+  /// \param ArchName - The architecture to use for finding a target.
+  /// \param TheTriple - The triple to use for finding a target.  The
+  /// triple is updated with canonical architecture name if a lookup
+  /// by architecture is done.
+  /// \param Error - On failure, an error string describing why no target was
+  /// found.
+  static const Target *lookupTarget(const std::string &ArchName,
+                                    Triple &TheTriple, std::string &Error);
+
+  /// @}
+  /// @name Target Registration
+  /// @{
+
+  /// RegisterTarget - Register the given target. Attempts to register a
+  /// target which has already been registered will be ignored.
+  ///
+  /// Clients are responsible for ensuring that registration doesn't occur
+  /// while another thread is attempting to access the registry. Typically
+  /// this is done by initializing all targets at program startup.
+  ///
+  /// @param T - The target being registered.
+  /// @param Name - The target name. This should be a static string.
+  /// @param ShortDesc - A short target description. This should be a static
+  /// string.
+  /// @param BackendName - The name of the backend. This should be a static
+  /// string that is the same for all targets that share a backend
+  /// implementation and must match the name used in the 'def X : Target ...' in
+  /// TableGen.
+  /// @param ArchMatchFn - The arch match checking function for this target.
+  /// @param HasJIT - Whether the target supports JIT code
+  /// generation.
+  static void RegisterTarget(Target &T, const char *Name, const char *ShortDesc,
+                             const char *BackendName,
+                             Target::ArchMatchFnTy ArchMatchFn,
+                             bool HasJIT = false);
+
+  /// RegisterMCAsmInfo - Register a MCAsmInfo implementation for the
+  /// given target.
+  ///
+  /// Clients are responsible for ensuring that registration doesn't occur
+  /// while another thread is attempting to access the registry. Typically
+  /// this is done by initializing all targets at program startup.
+  ///
+  /// @param T - The target being registered.
+  /// @param Fn - A function to construct a MCAsmInfo for the target.
+  static void RegisterMCAsmInfo(Target &T, Target::MCAsmInfoCtorFnTy Fn) {
+    T.MCAsmInfoCtorFn = Fn;
+  }
+
+  /// Register a MCObjectFileInfo implementation for the given target.
+  ///
+  /// Clients are responsible for ensuring that registration doesn't occur
+  /// while another thread is attempting to access the registry. Typically
+  /// this is done by initializing all targets at program startup.
+  ///
+  /// @param T - The target being registered.
+  /// @param Fn - A function to construct a MCObjectFileInfo for the target.
+  static void RegisterMCObjectFileInfo(Target &T,
+                                       Target::MCObjectFileInfoCtorFnTy Fn) {
+    T.MCObjectFileInfoCtorFn = Fn;
+  }
+
+  /// RegisterMCInstrInfo - Register a MCInstrInfo implementation for the
+  /// given target.
+  ///
+  /// Clients are responsible for ensuring that registration doesn't occur
+  /// while another thread is attempting to access the registry. Typically
+  /// this is done by initializing all targets at program startup.
+  ///
+  /// @param T - The target being registered.
+  /// @param Fn - A function to construct a MCInstrInfo for the target.
+  static void RegisterMCInstrInfo(Target &T, Target::MCInstrInfoCtorFnTy Fn) {
+    T.MCInstrInfoCtorFn = Fn;
+  }
+
+  /// RegisterMCInstrAnalysis - Register a MCInstrAnalysis implementation for
+  /// the given target.
+  static void RegisterMCInstrAnalysis(Target &T,
+                                      Target::MCInstrAnalysisCtorFnTy Fn) {
+    T.MCInstrAnalysisCtorFn = Fn;
+  }
+
+  /// RegisterMCRegInfo - Register a MCRegisterInfo implementation for the
+  /// given target.
+  ///
+  /// Clients are responsible for ensuring that registration doesn't occur
+  /// while another thread is attempting to access the registry. Typically
+  /// this is done by initializing all targets at program startup.
+  ///
+  /// @param T - The target being registered.
+  /// @param Fn - A function to construct a MCRegisterInfo for the target.
+  static void RegisterMCRegInfo(Target &T, Target::MCRegInfoCtorFnTy Fn) {
+    T.MCRegInfoCtorFn = Fn;
+  }
+
+  /// RegisterMCSubtargetInfo - Register a MCSubtargetInfo implementation for
+  /// the given target.
+  ///
+  /// Clients are responsible for ensuring that registration doesn't occur
+  /// while another thread is attempting to access the registry. Typically
+  /// this is done by initializing all targets at program startup.
+  ///
+  /// @param T - The target being registered.
+  /// @param Fn - A function to construct a MCSubtargetInfo for the target.
+  static void RegisterMCSubtargetInfo(Target &T,
+                                      Target::MCSubtargetInfoCtorFnTy Fn) {
+    T.MCSubtargetInfoCtorFn = Fn;
+  }
+
+  /// RegisterTargetMachine - Register a TargetMachine implementation for the
+  /// given target.
+  ///
+  /// Clients are responsible for ensuring that registration doesn't occur
+  /// while another thread is attempting to access the registry. Typically
+  /// this is done by initializing all targets at program startup.
+  ///
+  /// @param T - The target being registered.
+  /// @param Fn - A function to construct a TargetMachine for the target.
+  static void RegisterTargetMachine(Target &T, Target::TargetMachineCtorTy Fn) {
+    T.TargetMachineCtorFn = Fn;
+  }
+
+  /// RegisterMCAsmBackend - Register a MCAsmBackend implementation for the
+  /// given target.
+  ///
+  /// Clients are responsible for ensuring that registration doesn't occur
+  /// while another thread is attempting to access the registry. Typically
+  /// this is done by initializing all targets at program startup.
+  ///
+  /// @param T - The target being registered.
+  /// @param Fn - A function to construct an AsmBackend for the target.
+  static void RegisterMCAsmBackend(Target &T, Target::MCAsmBackendCtorTy Fn) {
+    T.MCAsmBackendCtorFn = Fn;
+  }
+
+  /// RegisterMCAsmParser - Register a MCTargetAsmParser implementation for
+  /// the given target.
+  ///
+  /// Clients are responsible for ensuring that registration doesn't occur
+  /// while another thread is attempting to access the registry. Typically
+  /// this is done by initializing all targets at program startup.
+  ///
+  /// @param T - The target being registered.
+  /// @param Fn - A function to construct an MCTargetAsmParser for the target.
+  static void RegisterMCAsmParser(Target &T, Target::MCAsmParserCtorTy Fn) {
+    T.MCAsmParserCtorFn = Fn;
+  }
+
+  /// RegisterAsmPrinter - Register an AsmPrinter implementation for the given
+  /// target.
+  ///
+  /// Clients are responsible for ensuring that registration doesn't occur
+  /// while another thread is attempting to access the registry. Typically
+  /// this is done by initializing all targets at program startup.
+  ///
+  /// @param T - The target being registered.
+  /// @param Fn - A function to construct an AsmPrinter for the target.
+  static void RegisterAsmPrinter(Target &T, Target::AsmPrinterCtorTy Fn) {
+    T.AsmPrinterCtorFn = Fn;
+  }
+
+  /// RegisterMCDisassembler - Register a MCDisassembler implementation for
+  /// the given target.
+  ///
+  /// Clients are responsible for ensuring that registration doesn't occur
+  /// while another thread is attempting to access the registry. Typically
+  /// this is done by initializing all targets at program startup.
+  ///
+  /// @param T - The target being registered.
+  /// @param Fn - A function to construct an MCDisassembler for the target.
+  static void RegisterMCDisassembler(Target &T,
+                                     Target::MCDisassemblerCtorTy Fn) {
+    T.MCDisassemblerCtorFn = Fn;
+  }
+
+  /// RegisterMCInstPrinter - Register a MCInstPrinter implementation for the
+  /// given target.
+  ///
+  /// Clients are responsible for ensuring that registration doesn't occur
+  /// while another thread is attempting to access the registry. Typically
+  /// this is done by initializing all targets at program startup.
+  ///
+  /// @param T - The target being registered.
+  /// @param Fn - A function to construct an MCInstPrinter for the target.
+  static void RegisterMCInstPrinter(Target &T, Target::MCInstPrinterCtorTy Fn) {
+    T.MCInstPrinterCtorFn = Fn;
+  }
+
+  /// RegisterMCCodeEmitter - Register a MCCodeEmitter implementation for the
+  /// given target.
+  ///
+  /// Clients are responsible for ensuring that registration doesn't occur
+  /// while another thread is attempting to access the registry. Typically
+  /// this is done by initializing all targets at program startup.
+  ///
+  /// @param T - The target being registered.
+  /// @param Fn - A function to construct an MCCodeEmitter for the target.
+  static void RegisterMCCodeEmitter(Target &T, Target::MCCodeEmitterCtorTy Fn) {
+    T.MCCodeEmitterCtorFn = Fn;
+  }
+
+  static void RegisterCOFFStreamer(Target &T, Target::COFFStreamerCtorTy Fn) {
+    T.COFFStreamerCtorFn = Fn;
+  }
+
+  static void RegisterMachOStreamer(Target &T, Target::MachOStreamerCtorTy Fn) {
+    T.MachOStreamerCtorFn = Fn;
+  }
+
+  static void RegisterELFStreamer(Target &T, Target::ELFStreamerCtorTy Fn) {
+    T.ELFStreamerCtorFn = Fn;
+  }
+
+  static void RegisterWasmStreamer(Target &T, Target::WasmStreamerCtorTy Fn) {
+    T.WasmStreamerCtorFn = Fn;
+  }
+
+  static void RegisterXCOFFStreamer(Target &T, Target::XCOFFStreamerCtorTy Fn) {
+    T.XCOFFStreamerCtorFn = Fn;
+  }
+
+  static void RegisterNullTargetStreamer(Target &T,
+                                         Target::NullTargetStreamerCtorTy Fn) {
+    T.NullTargetStreamerCtorFn = Fn;
+  }
+
+  static void RegisterAsmTargetStreamer(Target &T,
+                                        Target::AsmTargetStreamerCtorTy Fn) {
+    T.AsmTargetStreamerCtorFn = Fn;
+  }
+
+  static void
+  RegisterObjectTargetStreamer(Target &T,
+                               Target::ObjectTargetStreamerCtorTy Fn) {
+    T.ObjectTargetStreamerCtorFn = Fn;
+  }
+
+  /// RegisterMCRelocationInfo - Register an MCRelocationInfo
+  /// implementation for the given target.
+  ///
+  /// Clients are responsible for ensuring that registration doesn't occur
+  /// while another thread is attempting to access the registry. Typically
+  /// this is done by initializing all targets at program startup.
+  ///
+  /// @param T - The target being registered.
+  /// @param Fn - A function to construct an MCRelocationInfo for the target.
+  static void RegisterMCRelocationInfo(Target &T,
+                                       Target::MCRelocationInfoCtorTy Fn) {
+    T.MCRelocationInfoCtorFn = Fn;
+  }
+
+  /// RegisterMCSymbolizer - Register an MCSymbolizer
+  /// implementation for the given target.
+  ///
+  /// Clients are responsible for ensuring that registration doesn't occur
+  /// while another thread is attempting to access the registry. Typically
+  /// this is done by initializing all targets at program startup.
+  ///
+  /// @param T - The target being registered.
+  /// @param Fn - A function to construct an MCSymbolizer for the target.
+  static void RegisterMCSymbolizer(Target &T, Target::MCSymbolizerCtorTy Fn) {
+    T.MCSymbolizerCtorFn = Fn;
+  }
+
+  /// RegisterCustomBehaviour - Register a CustomBehaviour
+  /// implementation for the given target.
+  ///
+  /// Clients are responsible for ensuring that registration doesn't occur
+  /// while another thread is attempting to access the registry. Typically
+  /// this is done by initializing all targets at program startup.
+  ///
+  /// @param T - The target being registered.
+  /// @param Fn - A function to construct a CustomBehaviour for the target.
+  static void RegisterCustomBehaviour(Target &T,
+                                      Target::CustomBehaviourCtorTy Fn) {
+    T.CustomBehaviourCtorFn = Fn;
+  }
+
+  /// RegisterInstrPostProcess - Register an InstrPostProcess
+  /// implementation for the given target.
+  ///
+  /// Clients are responsible for ensuring that registration doesn't occur
+  /// while another thread is attempting to access the registry. Typically
+  /// this is done by initializing all targets at program startup.
+  ///
+  /// @param T - The target being registered.
+  /// @param Fn - A function to construct an InstrPostProcess for the target.
+  static void RegisterInstrPostProcess(Target &T,
+                                       Target::InstrPostProcessCtorTy Fn) {
+    T.InstrPostProcessCtorFn = Fn;
+  }
+
+  /// @}
+};
+
+//===--------------------------------------------------------------------===//
+
+/// RegisterTarget - Helper template for registering a target, for use in the
+/// target's initialization function. Usage:
+///
+///
+/// Target &getTheFooTarget() { // The global target instance.
+///   static Target TheFooTarget;
+///   return TheFooTarget;
+/// }
+/// extern "C" void LLVMInitializeFooTargetInfo() {
+///   RegisterTarget<Triple::foo> X(getTheFooTarget(), "foo", "Foo
+///   description", "Foo" /* Backend Name */);
+/// }
+template <Triple::ArchType TargetArchType = Triple::UnknownArch,
+          bool HasJIT = false>
+struct RegisterTarget {
+  RegisterTarget(Target &T, const char *Name, const char *Desc,
+                 const char *BackendName) {
+    TargetRegistry::RegisterTarget(T, Name, Desc, BackendName, &getArchMatch,
+                                   HasJIT);
+  }
+
+  static bool getArchMatch(Triple::ArchType Arch) {
+    return Arch == TargetArchType;
+  }
+};
+
+/// RegisterMCAsmInfo - Helper template for registering a target assembly info
+/// implementation.  This invokes the static "Create" method on the class to
+/// actually do the construction.  Usage:
+///
+/// extern "C" void LLVMInitializeFooTarget() {
+///   extern Target TheFooTarget;
+///   RegisterMCAsmInfo<FooMCAsmInfo> X(TheFooTarget);
+/// }
+template <class MCAsmInfoImpl> struct RegisterMCAsmInfo {
+  RegisterMCAsmInfo(Target &T) {
+    TargetRegistry::RegisterMCAsmInfo(T, &Allocator);
+  }
+
+private:
+  static MCAsmInfo *Allocator(const MCRegisterInfo & /*MRI*/, const Triple &TT,
+                              const MCTargetOptions &Options) {
+    return new MCAsmInfoImpl(TT, Options);
+  }
+};
+
+/// RegisterMCAsmInfoFn - Helper template for registering a target assembly info
+/// implementation.  This invokes the specified function to do the
+/// construction.  Usage:
+///
+/// extern "C" void LLVMInitializeFooTarget() {
+///   extern Target TheFooTarget;
+///   RegisterMCAsmInfoFn X(TheFooTarget, TheFunction);
+/// }
+struct RegisterMCAsmInfoFn {
+  RegisterMCAsmInfoFn(Target &T, Target::MCAsmInfoCtorFnTy Fn) {
+    TargetRegistry::RegisterMCAsmInfo(T, Fn);
+  }
+};
+
+/// Helper template for registering a target object file info implementation.
+/// This invokes the static "Create" method on the class to actually do the
+/// construction.  Usage:
+///
+/// extern "C" void LLVMInitializeFooTarget() {
+///   extern Target TheFooTarget;
+///   RegisterMCObjectFileInfo<FooMCObjectFileInfo> X(TheFooTarget);
+/// }
+template <class MCObjectFileInfoImpl> struct RegisterMCObjectFileInfo {
+  RegisterMCObjectFileInfo(Target &T) {
+    TargetRegistry::RegisterMCObjectFileInfo(T, &Allocator);
+  }
+
+private:
+  static MCObjectFileInfo *Allocator(MCContext &Ctx, bool PIC,
+                                     bool LargeCodeModel = false) {
+    return new MCObjectFileInfoImpl(Ctx, PIC, LargeCodeModel);
+  }
+};
+
+/// Helper template for registering a target object file info implementation.
+/// This invokes the specified function to do the construction.  Usage:
+///
+/// extern "C" void LLVMInitializeFooTarget() {
+///   extern Target TheFooTarget;
+///   RegisterMCObjectFileInfoFn X(TheFooTarget, TheFunction);
+/// }
+struct RegisterMCObjectFileInfoFn {
+  RegisterMCObjectFileInfoFn(Target &T, Target::MCObjectFileInfoCtorFnTy Fn) {
+    TargetRegistry::RegisterMCObjectFileInfo(T, Fn);
+  }
+};
+
+/// RegisterMCInstrInfo - Helper template for registering a target instruction
+/// info implementation.  This invokes the static "Create" method on the class
+/// to actually do the construction.  Usage:
+///
+/// extern "C" void LLVMInitializeFooTarget() {
+///   extern Target TheFooTarget;
+///   RegisterMCInstrInfo<FooMCInstrInfo> X(TheFooTarget);
+/// }
+template <class MCInstrInfoImpl> struct RegisterMCInstrInfo {
+  RegisterMCInstrInfo(Target &T) {
+    TargetRegistry::RegisterMCInstrInfo(T, &Allocator);
+  }
+
+private:
+  static MCInstrInfo *Allocator() { return new MCInstrInfoImpl(); }
+};
+
+/// RegisterMCInstrInfoFn - Helper template for registering a target
+/// instruction info implementation.  This invokes the specified function to
+/// do the construction.  Usage:
+///
+/// extern "C" void LLVMInitializeFooTarget() {
+///   extern Target TheFooTarget;
+///   RegisterMCInstrInfoFn X(TheFooTarget, TheFunction);
+/// }
+struct RegisterMCInstrInfoFn {
+  RegisterMCInstrInfoFn(Target &T, Target::MCInstrInfoCtorFnTy Fn) {
+    TargetRegistry::RegisterMCInstrInfo(T, Fn);
+  }
+};
+
+/// RegisterMCInstrAnalysis - Helper template for registering a target
+/// instruction analyzer implementation.  This invokes the static "Create"
+/// method on the class to actually do the construction.  Usage:
+///
+/// extern "C" void LLVMInitializeFooTarget() {
+///   extern Target TheFooTarget;
+///   RegisterMCInstrAnalysis<FooMCInstrAnalysis> X(TheFooTarget);
+/// }
+template <class MCInstrAnalysisImpl> struct RegisterMCInstrAnalysis {
+  RegisterMCInstrAnalysis(Target &T) {
+    TargetRegistry::RegisterMCInstrAnalysis(T, &Allocator);
+  }
+
+private:
+  static MCInstrAnalysis *Allocator(const MCInstrInfo *Info) {
+    return new MCInstrAnalysisImpl(Info);
+  }
+};
+
+/// RegisterMCInstrAnalysisFn - Helper template for registering a target
+/// instruction analyzer implementation.  This invokes the specified function
+/// to do the construction.  Usage:
+///
+/// extern "C" void LLVMInitializeFooTarget() {
+///   extern Target TheFooTarget;
+///   RegisterMCInstrAnalysisFn X(TheFooTarget, TheFunction);
+/// }
+struct RegisterMCInstrAnalysisFn {
+  RegisterMCInstrAnalysisFn(Target &T, Target::MCInstrAnalysisCtorFnTy Fn) {
+    TargetRegistry::RegisterMCInstrAnalysis(T, Fn);
+  }
+};
+
+/// RegisterMCRegInfo - Helper template for registering a target register info
+/// implementation.  This invokes the static "Create" method on the class to
+/// actually do the construction.  Usage:
+///
+/// extern "C" void LLVMInitializeFooTarget() {
+///   extern Target TheFooTarget;
+///   RegisterMCRegInfo<FooMCRegInfo> X(TheFooTarget);
+/// }
+template <class MCRegisterInfoImpl> struct RegisterMCRegInfo {
+  RegisterMCRegInfo(Target &T) {
+    TargetRegistry::RegisterMCRegInfo(T, &Allocator);
+  }
+
+private:
+  static MCRegisterInfo *Allocator(const Triple & /*TT*/) {
+    return new MCRegisterInfoImpl();
+  }
+};
+
+/// RegisterMCRegInfoFn - Helper template for registering a target register
+/// info implementation.  This invokes the specified function to do the
+/// construction.  Usage:
+///
+/// extern "C" void LLVMInitializeFooTarget() {
+///   extern Target TheFooTarget;
+///   RegisterMCRegInfoFn X(TheFooTarget, TheFunction);
+/// }
+struct RegisterMCRegInfoFn {
+  RegisterMCRegInfoFn(Target &T, Target::MCRegInfoCtorFnTy Fn) {
+    TargetRegistry::RegisterMCRegInfo(T, Fn);
+  }
+};
+
+/// RegisterMCSubtargetInfo - Helper template for registering a target
+/// subtarget info implementation.  This invokes the static "Create" method
+/// on the class to actually do the construction.  Usage:
+///
+/// extern "C" void LLVMInitializeFooTarget() {
+///   extern Target TheFooTarget;
+///   RegisterMCSubtargetInfo<FooMCSubtargetInfo> X(TheFooTarget);
+/// }
+template <class MCSubtargetInfoImpl> struct RegisterMCSubtargetInfo {
+  RegisterMCSubtargetInfo(Target &T) {
+    TargetRegistry::RegisterMCSubtargetInfo(T, &Allocator);
+  }
+
+private:
+  static MCSubtargetInfo *Allocator(const Triple & /*TT*/, StringRef /*CPU*/,
+                                    StringRef /*FS*/) {
+    return new MCSubtargetInfoImpl();
+  }
+};
+
+/// RegisterMCSubtargetInfoFn - Helper template for registering a target
+/// subtarget info implementation.  This invokes the specified function to
+/// do the construction.  Usage:
+///
+/// extern "C" void LLVMInitializeFooTarget() {
+///   extern Target TheFooTarget;
+///   RegisterMCSubtargetInfoFn X(TheFooTarget, TheFunction);
+/// }
+struct RegisterMCSubtargetInfoFn {
+  RegisterMCSubtargetInfoFn(Target &T, Target::MCSubtargetInfoCtorFnTy Fn) {
+    TargetRegistry::RegisterMCSubtargetInfo(T, Fn);
+  }
+};
+
+/// RegisterTargetMachine - Helper template for registering a target machine
+/// implementation, for use in the target machine initialization
+/// function. Usage:
+///
+/// extern "C" void LLVMInitializeFooTarget() {
+///   extern Target TheFooTarget;
+///   RegisterTargetMachine<FooTargetMachine> X(TheFooTarget);
+/// }
+template <class TargetMachineImpl> struct RegisterTargetMachine {
+  RegisterTargetMachine(Target &T) {
+    TargetRegistry::RegisterTargetMachine(T, &Allocator);
+  }
+
+private:
+  static TargetMachine *
+  Allocator(const Target &T, const Triple &TT, StringRef CPU, StringRef FS,
+            const TargetOptions &Options, Optional<Reloc::Model> RM,
+            Optional<CodeModel::Model> CM, CodeGenOpt::Level OL, bool JIT) {
+    return new TargetMachineImpl(T, TT, CPU, FS, Options, RM, CM, OL, JIT);
+  }
+};
+
+/// RegisterMCAsmBackend - Helper template for registering a target specific
+/// assembler backend. Usage:
+///
+/// extern "C" void LLVMInitializeFooMCAsmBackend() {
+///   extern Target TheFooTarget;
+///   RegisterMCAsmBackend<FooAsmLexer> X(TheFooTarget);
+/// }
+template <class MCAsmBackendImpl> struct RegisterMCAsmBackend {
+  RegisterMCAsmBackend(Target &T) {
+    TargetRegistry::RegisterMCAsmBackend(T, &Allocator);
+  }
+
+private:
+  static MCAsmBackend *Allocator(const Target &T, const MCSubtargetInfo &STI,
+                                 const MCRegisterInfo &MRI,
+                                 const MCTargetOptions &Options) {
+    return new MCAsmBackendImpl(T, STI, MRI);
+  }
+};
+
+/// RegisterMCAsmParser - Helper template for registering a target specific
+/// assembly parser, for use in the target machine initialization
+/// function. Usage:
+///
+/// extern "C" void LLVMInitializeFooMCAsmParser() {
+///   extern Target TheFooTarget;
+///   RegisterMCAsmParser<FooAsmParser> X(TheFooTarget);
+/// }
+template <class MCAsmParserImpl> struct RegisterMCAsmParser {
+  RegisterMCAsmParser(Target &T) {
+    TargetRegistry::RegisterMCAsmParser(T, &Allocator);
+  }
+
+private:
+  static MCTargetAsmParser *Allocator(const MCSubtargetInfo &STI,
+                                      MCAsmParser &P, const MCInstrInfo &MII,
+                                      const MCTargetOptions &Options) {
+    return new MCAsmParserImpl(STI, P, MII, Options);
+  }
+};
+
+/// RegisterAsmPrinter - Helper template for registering a target specific
+/// assembly printer, for use in the target machine initialization
+/// function. Usage:
+///
+/// extern "C" void LLVMInitializeFooAsmPrinter() {
+///   extern Target TheFooTarget;
+///   RegisterAsmPrinter<FooAsmPrinter> X(TheFooTarget);
+/// }
+template <class AsmPrinterImpl> struct RegisterAsmPrinter {
+  RegisterAsmPrinter(Target &T) {
+    TargetRegistry::RegisterAsmPrinter(T, &Allocator);
+  }
+
+private:
+  static AsmPrinter *Allocator(TargetMachine &TM,
+                               std::unique_ptr<MCStreamer> &&Streamer) {
+    return new AsmPrinterImpl(TM, std::move(Streamer));
+  }
+};
+
+/// RegisterMCCodeEmitter - Helper template for registering a target specific
+/// machine code emitter, for use in the target initialization
+/// function. Usage:
+///
+/// extern "C" void LLVMInitializeFooMCCodeEmitter() {
+///   extern Target TheFooTarget;
+///   RegisterMCCodeEmitter<FooCodeEmitter> X(TheFooTarget);
+/// }
+template <class MCCodeEmitterImpl> struct RegisterMCCodeEmitter {
+  RegisterMCCodeEmitter(Target &T) {
+    TargetRegistry::RegisterMCCodeEmitter(T, &Allocator);
+  }
+
+private:
+  static MCCodeEmitter *Allocator(const MCInstrInfo & /*II*/,
+                                  const MCRegisterInfo & /*MRI*/,
+                                  MCContext & /*Ctx*/) {
+    return new MCCodeEmitterImpl();
+  }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_MC_TARGETREGISTRY_H
diff --git a/llvm/include/llvm/MCA/CustomBehaviour.h b/llvm/include/llvm/MCA/CustomBehaviour.h
index 655a9c49c599..395b07cf722b 100644
--- a/llvm/include/llvm/MCA/CustomBehaviour.h
+++ b/llvm/include/llvm/MCA/CustomBehaviour.h
@@ -22,6 +22,7 @@
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MCA/SourceMgr.h"
+#include "llvm/MCA/View.h"
 
 namespace llvm {
 namespace mca {
@@ -55,29 +56,53 @@ public:
 class CustomBehaviour {
 protected:
   const MCSubtargetInfo &STI;
-  const SourceMgr &SrcMgr;
+  const mca::SourceMgr &SrcMgr;
   const MCInstrInfo &MCII;
 
 public:
-  CustomBehaviour(const MCSubtargetInfo &STI, const SourceMgr &SrcMgr,
+  CustomBehaviour(const MCSubtargetInfo &STI, const mca::SourceMgr &SrcMgr,
                   const MCInstrInfo &MCII)
       : STI(STI), SrcMgr(SrcMgr), MCII(MCII) {}
 
   virtual ~CustomBehaviour();
 
-  // Before the llvm-mca pipeline dispatches an instruction, it first checks
-  // for any register or resource dependencies / hazards. If it doesn't find
-  // any, this method will be invoked to determine if there are any custom
-  // hazards that the instruction needs to wait for.
-  // The return value of this method is the number of cycles that the
-  // instruction needs to wait for.
-  // It's safe to underestimate the number of cycles to wait for since these
-  // checks will be invoked again before the intruction gets dispatched.
-  // However, it's not safe (accurate) to overestimate the number of cycles
-  // to wait for since the instruction will wait for AT LEAST that number of
-  // cycles before attempting to be dispatched again.
+  /// Before the llvm-mca pipeline dispatches an instruction, it first checks
+  /// for any register or resource dependencies / hazards. If it doesn't find
+  /// any, this method will be invoked to determine if there are any custom
+  /// hazards that the instruction needs to wait for.
+  /// The return value of this method is the number of cycles that the
+  /// instruction needs to wait for.
+  /// It's safe to underestimate the number of cycles to wait for since these
+  /// checks will be invoked again before the intruction gets dispatched.
+  /// However, it's not safe (accurate) to overestimate the number of cycles
+  /// to wait for since the instruction will wait for AT LEAST that number of
+  /// cycles before attempting to be dispatched again.
   virtual unsigned checkCustomHazard(ArrayRef<InstRef> IssuedInst,
                                      const InstRef &IR);
+
+  // Functions that target CBs can override to return a list of
+  // target specific Views that need to live within /lib/Target/ so that
+  // they can benefit from the target CB or from backend functionality that is
+  // not already exposed through MC-layer classes. Keep in mind that how this
+  // function is used is that the function is called within llvm-mca.cpp and
+  // then each unique_ptr<View> is passed into the PipelinePrinter::addView()
+  // function. This function will then std::move the View into its own vector of
+  // Views. So any CB that overrides this function needs to make sure that they
+  // are not relying on the current address or reference of the View
+  // unique_ptrs. If you do need the CB and View to be able to communicate with
+  // each other, consider giving the View a reference or pointer to the CB when
+  // the View is constructed. Then the View can query the CB for information
+  // when it needs it.
+  /// Return a vector of Views that will be added before all other Views.
+  virtual std::vector<std::unique_ptr<View>>
+  getStartViews(llvm::MCInstPrinter &IP, llvm::ArrayRef<llvm::MCInst> Insts);
+  /// Return a vector of Views that will be added after the InstructionInfoView.
+  virtual std::vector<std::unique_ptr<View>>
+  getPostInstrInfoViews(llvm::MCInstPrinter &IP,
+                        llvm::ArrayRef<llvm::MCInst> Insts);
+  /// Return a vector of Views that will be added after all other Views.
+  virtual std::vector<std::unique_ptr<View>>
+  getEndViews(llvm::MCInstPrinter &IP, llvm::ArrayRef<llvm::MCInst> Insts);
 };
 
 } // namespace mca
diff --git a/llvm/include/llvm/MCA/Instruction.h b/llvm/include/llvm/MCA/Instruction.h
index 988cddcbe013..3eb32186d551 100644
--- a/llvm/include/llvm/MCA/Instruction.h
+++ b/llvm/include/llvm/MCA/Instruction.h
@@ -46,7 +46,7 @@ class MCAOperand {
     kSFPImmediate, ///< Single-floating-point immediate operand.
     kDFPImmediate, ///< Double-Floating-point immediate operand.
   };
-  MCAOperandType Kind = kInvalid;
+  MCAOperandType Kind;
 
   union {
     unsigned RegVal;
@@ -62,7 +62,7 @@ class MCAOperand {
   unsigned Index;
 
 public:
-  MCAOperand() : FPImmVal(0) {}
+  MCAOperand() : Kind(kInvalid), FPImmVal(), Index() {}
 
   bool isValid() const { return Kind != kInvalid; }
   bool isReg() const { return Kind == kRegister; }
diff --git a/llvm/include/llvm/MCA/Stages/InOrderIssueStage.h b/llvm/include/llvm/MCA/Stages/InOrderIssueStage.h
index b7006e761647..42f386a13d85 100644
--- a/llvm/include/llvm/MCA/Stages/InOrderIssueStage.h
+++ b/llvm/include/llvm/MCA/Stages/InOrderIssueStage.h
@@ -21,6 +21,7 @@
 
 namespace llvm {
 namespace mca {
+class LSUnit;
 class RegisterFile;
 
 struct StallInfo {
@@ -29,6 +30,7 @@ struct StallInfo {
     REGISTER_DEPS,
     DISPATCH,
     DELAY,
+    LOAD_STORE,
     CUSTOM_STALL
   };
 
@@ -54,6 +56,7 @@ class InOrderIssueStage final : public Stage {
   RegisterFile &PRF;
   ResourceManager RM;
   CustomBehaviour &CB;
+  LSUnit &LSU;
 
   /// Instructions that were issued, but not executed yet.
   SmallVector<InstRef, 4> IssuedInst;
@@ -110,7 +113,7 @@ class InOrderIssueStage final : public Stage {
 
 public:
   InOrderIssueStage(const MCSubtargetInfo &STI, RegisterFile &PRF,
-                    CustomBehaviour &CB);
+                    CustomBehaviour &CB, LSUnit &LSU);
 
   unsigned getIssueWidth() const;
   bool isAvailable(const InstRef &) const override;
diff --git a/llvm/include/llvm/MCA/View.h b/llvm/include/llvm/MCA/View.h
new file mode 100644
index 000000000000..ff8fc1ceb3f1
--- /dev/null
+++ b/llvm/include/llvm/MCA/View.h
@@ -0,0 +1,41 @@
+//===----------------------- View.h -----------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines the main interface for Views. Each view contributes a
+/// portion of the final report generated by the tool.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_VIEW_H
+#define LLVM_MCA_VIEW_H
+
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MCA/HWEventListener.h"
+#include "llvm/Support/JSON.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+namespace mca {
+
+class View : public HWEventListener {
+public:
+  virtual ~View() = default;
+
+  virtual void printView(llvm::raw_ostream &OS) const = 0;
+  virtual StringRef getNameAsString() const = 0;
+
+  virtual json::Value toJSON() const { return "not implemented"; }
+  virtual bool isSerializable() const { return true; }
+
+  void anchor() override;
+};
+} // namespace mca
+} // namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/Object/ELF.h b/llvm/include/llvm/Object/ELF.h
index c5f966891bd0..37f23c435ae1 100644
--- a/llvm/include/llvm/Object/ELF.h
+++ b/llvm/include/llvm/Object/ELF.h
@@ -81,10 +81,6 @@ getElfArchType(StringRef Object) {
                         (uint8_t)Object[ELF::EI_DATA]);
 }
 
-static inline Error createError(const Twine &Err) {
-  return make_error<StringError>(Err, object_error::parse_failed);
-}
-
 enum PPCInstrMasks : uint64_t {
   PADDI_R12_NO_DISP = 0x0610000039800000,
   ADDIS_R12_TO_R2_NO_DISP = 0x3D820000,
@@ -392,8 +388,7 @@ public:
   Expected<ArrayRef<T>> getSectionContentsAsArray(const Elf_Shdr &Sec) const;
   Expected<ArrayRef<uint8_t>> getSectionContents(const Elf_Shdr &Sec) const;
   Expected<ArrayRef<uint8_t>> getSegmentContents(const Elf_Phdr &Phdr) const;
-  Expected<std::vector<Elf_BBAddrMap>>
-  decodeBBAddrMap(const Elf_Shdr &Sec) const;
+  Expected<std::vector<BBAddrMap>> decodeBBAddrMap(const Elf_Shdr &Sec) const;
 };
 
 using ELF32LEFile = ELFFile<ELF32LE>;
diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h
index c87a09f86fae..716b94d92d03 100644
--- a/llvm/include/llvm/Object/ELFObjectFile.h
+++ b/llvm/include/llvm/Object/ELFObjectFile.h
@@ -96,6 +96,10 @@ public:
 
   std::vector<std::pair<Optional<DataRefImpl>, uint64_t>>
   getPltAddresses() const;
+
+  /// Returns a vector containing a symbol version for each dynamic symbol.
+  /// Returns an empty vector if version sections do not exist.
+  Expected<std::vector<VersionEntry>> readDynsymVersions() const;
 };
 
 class ELFSectionRef : public SectionRef {
@@ -407,7 +411,8 @@ public:
   const Elf_Shdr *getRelSection(DataRefImpl Rel) const {
     auto RelSecOrErr = EF.getSection(Rel.d.a);
     if (!RelSecOrErr)
-      report_fatal_error(errorToErrorCode(RelSecOrErr.takeError()).message());
+      report_fatal_error(
+          Twine(errorToErrorCode(RelSecOrErr.takeError()).message()));
     return *RelSecOrErr;
   }
 
@@ -728,7 +733,8 @@ Expected<uint32_t> ELFObjectFile<ELFT>::getSymbolFlags(DataRefImpl Sym) const {
   } else if (EF.getHeader().e_machine == ELF::EM_ARM) {
     if (Expected<StringRef> NameOrErr = getSymbolName(Sym)) {
       StringRef Name = *NameOrErr;
-      if (Name.startswith("$d") || Name.startswith("$t") ||
+      // TODO Investigate why empty name symbols need to be marked.
+      if (Name.empty() || Name.startswith("$d") || Name.startswith("$t") ||
           Name.startswith("$a"))
         Result |= SymbolRef::SF_FormatSpecific;
     } else {
@@ -966,7 +972,8 @@ ELFObjectFile<ELFT>::section_rel_end(DataRefImpl Sec) const {
   // Error check sh_link here so that getRelocationSymbol can just use it.
   auto SymSecOrErr = EF.getSection(RelSec->sh_link);
   if (!SymSecOrErr)
-    report_fatal_error(errorToErrorCode(SymSecOrErr.takeError()).message());
+    report_fatal_error(
+        Twine(errorToErrorCode(SymSecOrErr.takeError()).message()));
 
   RelData.d.b += S->sh_size / S->sh_entsize;
   return relocation_iterator(RelocationRef(RelData, this));
@@ -1055,7 +1062,7 @@ ELFObjectFile<ELFT>::getRel(DataRefImpl Rel) const {
   assert(getRelSection(Rel)->sh_type == ELF::SHT_REL);
   auto Ret = EF.template getEntry<Elf_Rel>(Rel.d.a, Rel.d.b);
   if (!Ret)
-    report_fatal_error(errorToErrorCode(Ret.takeError()).message());
+    report_fatal_error(Twine(errorToErrorCode(Ret.takeError()).message()));
   return *Ret;
 }
 
@@ -1065,7 +1072,7 @@ ELFObjectFile<ELFT>::getRela(DataRefImpl Rela) const {
   assert(getRelSection(Rela)->sh_type == ELF::SHT_RELA);
   auto Ret = EF.template getEntry<Elf_Rela>(Rela.d.a, Rela.d.b);
   if (!Ret)
-    report_fatal_error(errorToErrorCode(Ret.takeError()).message());
+    report_fatal_error(Twine(errorToErrorCode(Ret.takeError()).message()));
   return *Ret;
 }
 
diff --git a/llvm/include/llvm/Object/ELFTypes.h b/llvm/include/llvm/Object/ELFTypes.h
index 54ebd751d8d2..e59a63d93989 100644
--- a/llvm/include/llvm/Object/ELFTypes.h
+++ b/llvm/include/llvm/Object/ELFTypes.h
@@ -44,7 +44,6 @@ template <class ELFT> struct Elf_Nhdr_Impl;
 template <class ELFT> class Elf_Note_Impl;
 template <class ELFT> class Elf_Note_Iterator_Impl;
 template <class ELFT> struct Elf_CGProfile_Impl;
-template <class ELFT> struct Elf_BBAddrMap_Impl;
 
 template <endianness E, bool Is64> struct ELFType {
 private:
@@ -76,7 +75,6 @@ public:
   using Note = Elf_Note_Impl<ELFType<E, Is64>>;
   using NoteIterator = Elf_Note_Iterator_Impl<ELFType<E, Is64>>;
   using CGProfile = Elf_CGProfile_Impl<ELFType<E, Is64>>;
-  using BBAddrMap = Elf_BBAddrMap_Impl<ELFType<E, Is64>>;
   using DynRange = ArrayRef<Dyn>;
   using ShdrRange = ArrayRef<Shdr>;
   using SymRange = ArrayRef<Sym>;
@@ -131,7 +129,6 @@ using ELF64BE = ELFType<support::big, true>;
   using Elf_Note = typename ELFT::Note;                                        \
   using Elf_Note_Iterator = typename ELFT::NoteIterator;                       \
   using Elf_CGProfile = typename ELFT::CGProfile;                              \
-  using Elf_BBAddrMap = typename ELFT::BBAddrMap;                              \
   using Elf_Dyn_Range = typename ELFT::DynRange;                               \
   using Elf_Shdr_Range = typename ELFT::ShdrRange;                             \
   using Elf_Sym_Range = typename ELFT::SymRange;                               \
@@ -797,9 +794,8 @@ template <class ELFT> struct Elf_Mips_ABIFlags {
 };
 
 // Struct representing the BBAddrMap for one function.
-template <class ELFT> struct Elf_BBAddrMap_Impl {
-  LLVM_ELF_IMPORT_TYPES_ELFT(ELFT)
-  uintX_t Addr; // Function address
+struct BBAddrMap {
+  uint64_t Addr; // Function address
   // Struct representing the BBAddrMap information for one basic block.
   struct BBEntry {
     uint32_t Offset; // Offset of basic block relative to function start.
diff --git a/llvm/include/llvm/Object/Error.h b/llvm/include/llvm/Object/Error.h
index 07744188444a..1fc1f6603a36 100644
--- a/llvm/include/llvm/Object/Error.h
+++ b/llvm/include/llvm/Object/Error.h
@@ -82,6 +82,10 @@ private:
 /// error() function needs to called on the llvm::Error.
 Error isNotObjectErrorInvalidFileType(llvm::Error Err);
 
+inline Error createError(const Twine &Err) {
+  return make_error<StringError>(Err, object_error::parse_failed);
+}
+
 } // end namespace object.
 
 } // end namespace llvm.
diff --git a/llvm/include/llvm/Object/MachO.h b/llvm/include/llvm/Object/MachO.h
index d2ad12e98deb..ca5d63e4074f 100644
--- a/llvm/include/llvm/Object/MachO.h
+++ b/llvm/include/llvm/Object/MachO.h
@@ -311,6 +311,9 @@ public:
   bool isSectionBitcode(DataRefImpl Sec) const override;
   bool isDebugSection(DataRefImpl Sec) const override;
 
+  /// Return the raw contents of an entire segment.
+  ArrayRef<uint8_t> getSegmentContents(StringRef SegmentName) const;
+
   /// When dsymutil generates the companion file, it strips all unnecessary
   /// sections (e.g. everything in the _TEXT segment) by omitting their body
   /// and setting the offset in their corresponding load command to zero.
diff --git a/llvm/include/llvm/Object/Wasm.h b/llvm/include/llvm/Object/Wasm.h
index 2cea950fcf25..e4802c087b8b 100644
--- a/llvm/include/llvm/Object/Wasm.h
+++ b/llvm/include/llvm/Object/Wasm.h
@@ -9,7 +9,7 @@
 // This file declares the WasmObjectFile class, which implements the ObjectFile
 // interface for Wasm files.
 //
-// See: https://github.com/WebAssembly/design/blob/master/BinaryEncoding.md
+// See: https://github.com/WebAssembly/design/blob/main/BinaryEncoding.md
 //
 //===----------------------------------------------------------------------===//
 
@@ -37,15 +37,13 @@ public:
   WasmSymbol(const wasm::WasmSymbolInfo &Info,
              const wasm::WasmGlobalType *GlobalType,
              const wasm::WasmTableType *TableType,
-             const wasm::WasmTagType *TagType,
              const wasm::WasmSignature *Signature)
       : Info(Info), GlobalType(GlobalType), TableType(TableType),
-        TagType(TagType), Signature(Signature) {}
+        Signature(Signature) {}
 
   const wasm::WasmSymbolInfo &Info;
   const wasm::WasmGlobalType *GlobalType;
   const wasm::WasmTableType *TableType;
-  const wasm::WasmTagType *TagType;
   const wasm::WasmSignature *Signature;
 
   bool isTypeFunction() const {
@@ -138,7 +136,6 @@ public:
     return TargetFeatures;
   }
   ArrayRef<wasm::WasmSignature> types() const { return Signatures; }
-  ArrayRef<uint32_t> functionTypes() const { return FunctionTypes; }
   ArrayRef<wasm::WasmImport> imports() const { return Imports; }
   ArrayRef<wasm::WasmTable> tables() const { return Tables; }
   ArrayRef<wasm::WasmLimits> memories() const { return Memories; }
@@ -260,6 +257,7 @@ private:
 
   // Custom section types
   Error parseDylinkSection(ReadContext &Ctx);
+  Error parseDylink0Section(ReadContext &Ctx);
   Error parseNameSection(ReadContext &Ctx);
   Error parseLinkingSection(ReadContext &Ctx);
   Error parseLinkingSectionSymtab(ReadContext &Ctx);
@@ -274,7 +272,6 @@ private:
   wasm::WasmProducerInfo ProducerInfo;
   std::vector<wasm::WasmFeatureEntry> TargetFeatures;
   std::vector<wasm::WasmSignature> Signatures;
-  std::vector<uint32_t> FunctionTypes;
   std::vector<wasm::WasmTable> Tables;
   std::vector<wasm::WasmLimits> Memories;
   std::vector<wasm::WasmGlobal> Globals;
diff --git a/llvm/include/llvm/Object/XCOFFObjectFile.h b/llvm/include/llvm/Object/XCOFFObjectFile.h
index 7d024fbc3eae..94136afc45ea 100644
--- a/llvm/include/llvm/Object/XCOFFObjectFile.h
+++ b/llvm/include/llvm/Object/XCOFFObjectFile.h
@@ -51,6 +51,101 @@ struct XCOFFFileHeader64 {
   support::ubig32_t NumberOfSymTableEntries;
 };
 
+template <typename T> struct XCOFFAuxiliaryHeader {
+  static constexpr uint8_t AuxiHeaderFlagMask = 0xF0;
+  static constexpr uint8_t AuxiHeaderTDataAlignmentMask = 0x0F;
+
+public:
+  uint8_t getFlag() const {
+    return static_cast<const T *>(this)->FlagAndTDataAlignment &
+           AuxiHeaderFlagMask;
+  }
+  uint8_t getTDataAlignment() const {
+    return static_cast<const T *>(this)->FlagAndTDataAlignment &
+           AuxiHeaderTDataAlignmentMask;
+  }
+};
+
+struct XCOFFAuxiliaryHeader32 : XCOFFAuxiliaryHeader<XCOFFAuxiliaryHeader32> {
+  support::ubig16_t
+      AuxMagic; ///< If the value of the o_vstamp field is greater than 1, the
+                ///< o_mflags field is reserved for future use and it should
+                ///< contain 0. Otherwise, this field is not used.
+  support::ubig16_t
+      Version; ///< The valid values are 1 and 2. When the o_vstamp field is 2
+               ///< in an XCOFF32 file, the new interpretation of the n_type
+               ///< field in the symbol table entry is used.
+  support::ubig32_t TextSize;
+  support::ubig32_t InitDataSize;
+  support::ubig32_t BssDataSize;
+  support::ubig32_t EntryPointAddr;
+  support::ubig32_t TextStartAddr;
+  support::ubig32_t DataStartAddr;
+  support::ubig32_t TOCAnchorAddr;
+  support::ubig16_t SecNumOfEntryPoint;
+  support::ubig16_t SecNumOfText;
+  support::ubig16_t SecNumOfData;
+  support::ubig16_t SecNumOfTOC;
+  support::ubig16_t SecNumOfLoader;
+  support::ubig16_t SecNumOfBSS;
+  support::ubig16_t MaxAlignOfText;
+  support::ubig16_t MaxAlignOfData;
+  support::ubig16_t ModuleType;
+  uint8_t CpuFlag;
+  uint8_t CpuType;
+  support::ubig32_t MaxStackSize; ///< If the value is 0, the system default
+                                  ///< maximum stack size is used.
+  support::ubig32_t MaxDataSize;  ///< If the value is 0, the system default
+                                  ///< maximum data size is used.
+  support::ubig32_t
+      ReservedForDebugger; ///< This field should contain 0. When a loaded
+                           ///< program is being debugged, the memory image of
+                           ///< this field may be modified by a debugger to
+                           ///< insert a trap instruction.
+  uint8_t TextPageSize;  ///< Specifies the size of pages for the exec text. The
+                         ///< default value is 0 (system-selected page size).
+  uint8_t DataPageSize;  ///< Specifies the size of pages for the exec data. The
+                         ///< default value is 0 (system-selected page size).
+  uint8_t StackPageSize; ///< Specifies the size of pages for the stack. The
+                         ///< default value is 0 (system-selected page size).
+  uint8_t FlagAndTDataAlignment;
+  support::ubig16_t SecNumOfTData;
+  support::ubig16_t SecNumOfTBSS;
+};
+
+struct XCOFFAuxiliaryHeader64 : XCOFFAuxiliaryHeader<XCOFFAuxiliaryHeader32> {
+  support::ubig16_t AuxMagic;
+  support::ubig16_t Version;
+  support::ubig32_t ReservedForDebugger;
+  support::ubig64_t TextStartAddr;
+  support::ubig64_t DataStartAddr;
+  support::ubig64_t TOCAnchorAddr;
+  support::ubig16_t SecNumOfEntryPoint;
+  support::ubig16_t SecNumOfText;
+  support::ubig16_t SecNumOfData;
+  support::ubig16_t SecNumOfTOC;
+  support::ubig16_t SecNumOfLoader;
+  support::ubig16_t SecNumOfBSS;
+  support::ubig16_t MaxAlignOfText;
+  support::ubig16_t MaxAlignOfData;
+  support::ubig16_t ModuleType;
+  uint8_t CpuFlag;
+  uint8_t CpuType;
+  uint8_t TextPageSize;
+  uint8_t DataPageSize;
+  uint8_t StackPageSize;
+  uint8_t FlagAndTDataAlignment;
+  support::ubig64_t TextSize;
+  support::ubig64_t InitDataSize;
+  support::ubig64_t BssDataSize;
+  support::ubig64_t EntryPointAddr;
+  support::ubig64_t MaxStackSize;
+  support::ubig64_t MaxDataSize;
+  support::ubig16_t SecNumOfTData;
+  support::ubig16_t SecNumOfTBSS;
+  support::ubig16_t XCOFF64Flag;
+};
+
 template <typename T> struct XCOFFSectionHeader {
   // Least significant 3 bits are reserved.
   static constexpr unsigned SectionFlagsReservedMask = 0x7;
@@ -97,6 +192,31 @@ struct XCOFFSectionHeader64 : XCOFFSectionHeader<XCOFFSectionHeader64> {
   char Padding[4];
 };
 
+struct LoaderSectionHeader32 {
+  support::ubig32_t Version;
+  support::ubig32_t NumberOfSymTabEnt;
+  support::ubig32_t NumberOfRelTabEnt;
+  support::ubig32_t LengthOfImpidStrTbl;
+  support::ubig32_t NumberOfImpid;
+  support::big32_t OffsetToImpid;
+  support::ubig32_t LengthOfStrTbl;
+  support::big32_t OffsetToStrTbl;
+};
+
+struct LoaderSectionHeader64 {
+  support::ubig32_t Version;
+  support::ubig32_t NumberOfSymTabEnt;
+  support::ubig32_t NumberOfRelTabEnt;
+  support::ubig32_t LengthOfImpidStrTbl;
+  support::ubig32_t NumberOfImpid;
+  support::ubig32_t LengthOfStrTbl;
+  support::big64_t OffsetToImpid;
+  support::big64_t OffsetToStrTbl;
+  support::big64_t OffsetToSymTbl;
+  char Padding[16];
+  support::big32_t OffsetToRelEnt;
+};
+
 struct XCOFFStringTable {
   uint32_t Size;
   const char *Data;
@@ -228,7 +348,7 @@ struct XCOFFSectAuxEntForStat {
   uint8_t Pad[10];
 }; // 32-bit XCOFF file only.
 
-struct XCOFFRelocation32 {
+template <typename AddressType> struct XCOFFRelocation {
   // Masks for packing/unpacking the r_rsize field of relocations.
 
   // The msb is used to indicate if the bits being relocated are signed or
@@ -244,7 +364,7 @@ struct XCOFFRelocation32 {
   static constexpr uint8_t XR_BIASED_LENGTH_MASK = 0x3f;
 
 public:
-  support::ubig32_t VirtualAddress;
+  AddressType VirtualAddress;
   support::ubig32_t SymbolIndex;
 
   // Packed field, see XR_* masks for details of packing.
@@ -260,11 +380,18 @@ public:
   uint8_t getRelocatedLength() const;
 };
 
+extern template struct XCOFFRelocation<llvm::support::ubig32_t>;
+extern template struct XCOFFRelocation<llvm::support::ubig64_t>;
+
+struct XCOFFRelocation32 : XCOFFRelocation<llvm::support::ubig32_t> {};
+struct XCOFFRelocation64 : XCOFFRelocation<llvm::support::ubig64_t> {};
+
 class XCOFFSymbolRef;
 
 class XCOFFObjectFile : public ObjectFile {
 private:
   const void *FileHeader = nullptr;
+  const void *AuxiliaryHeader = nullptr;
   const void *SectionHeaderTable = nullptr;
 
   const void *SymbolTblPtr = nullptr;
@@ -275,6 +402,7 @@ private:
 
   const XCOFFSectionHeader32 *sectionHeaderTable32() const;
   const XCOFFSectionHeader64 *sectionHeaderTable64() const;
+  template <typename T> const T *sectionHeaderTable() const;
 
   size_t getFileHeaderSize() const;
   size_t getSectionHeaderSize() const;
@@ -283,6 +411,7 @@ private:
   const XCOFFSectionHeader64 *toSection64(DataRefImpl Ref) const;
   uintptr_t getSectionHeaderTableAddress() const;
   uintptr_t getEndOfSymbolTableAddress() const;
+  Expected<uintptr_t> getLoaderSectionAddress() const;
 
   // This returns a pointer to the start of the storage for the name field of
   // the 32-bit or 64-bit SectionHeader struct. This string is *not* necessarily
@@ -322,6 +451,7 @@ public:
   Expected<StringRef> getSymbolName(DataRefImpl Symb) const override;
   Expected<uint64_t> getSymbolAddress(DataRefImpl Symb) const override;
   uint64_t getSymbolValueImpl(DataRefImpl Symb) const override;
+  uint32_t getSymbolAlignment(DataRefImpl Symb) const override;
   uint64_t getCommonSymbolSizeImpl(DataRefImpl Symb) const override;
   Expected<SymbolRef::Type> getSymbolType(DataRefImpl Symb) const override;
   Expected<section_iterator> getSymbolSection(DataRefImpl Symb) const override;
@@ -368,6 +498,9 @@ public:
   // Below here is the non-inherited interface.
   bool is64Bit() const;
 
+  const XCOFFAuxiliaryHeader32 *auxiliaryHeader32() const;
+  const XCOFFAuxiliaryHeader64 *auxiliaryHeader64() const;
+
   const void *getPointerToSymbolTable() const { return SymbolTblPtr; }
 
   Expected<StringRef> getSymbolSectionName(XCOFFSymbolRef Ref) const;
@@ -398,6 +531,11 @@ public:
   uint32_t getNumberOfSymbolTableEntries() const;
 
   uint32_t getSymbolIndex(uintptr_t SymEntPtr) const;
+  uint64_t getSymbolSize(DataRefImpl Symb) const;
+  uintptr_t getSymbolByIndex(uint32_t Idx) const {
+    return reinterpret_cast<uintptr_t>(SymbolTblPtr) +
+           XCOFF::SymbolTableEntrySize * Idx;
+  }
   uintptr_t getSymbolEntryAddressByIndex(uint32_t SymbolTableIndex) const;
   Expected<StringRef> getSymbolNameByIndex(uint32_t SymbolTableIndex) const;
 
@@ -415,11 +553,15 @@ public:
   void checkSymbolEntryPointer(uintptr_t SymbolEntPtr) const;
 
   // Relocation-related interfaces.
+  template <typename T>
   Expected<uint32_t>
-  getLogicalNumberOfRelocationEntries(const XCOFFSectionHeader32 &Sec) const;
+  getNumberOfRelocationEntries(const XCOFFSectionHeader<T> &Sec) const;
 
-  Expected<ArrayRef<XCOFFRelocation32>>
-  relocations(const XCOFFSectionHeader32 &) const;
+  template <typename Shdr, typename Reloc>
+  Expected<ArrayRef<Reloc>> relocations(const Shdr &Sec) const;
+
+  // Loader section related interfaces.
+  Expected<StringRef> getImportFileTable() const;
 
   // This function returns string table entry.
   Expected<StringRef> getStringTableEntry(uint32_t Offset) const;
@@ -572,6 +714,7 @@ class XCOFFTracebackTable {
   Optional<uint8_t> ExtensionTable;
 
   XCOFFTracebackTable(const uint8_t *Ptr, uint64_t &Size, Error &Err);
+
 public:
   /// Parse an XCOFF Traceback Table from \a Ptr with \a Size bytes.
   /// Returns an XCOFFTracebackTable upon successful parsing, otherwise an
diff --git a/llvm/include/llvm/ObjectYAML/MachOYAML.h b/llvm/include/llvm/ObjectYAML/MachOYAML.h
index 5d1d3ee23594..ee89f4eac61f 100644
--- a/llvm/include/llvm/ObjectYAML/MachOYAML.h
+++ b/llvm/include/llvm/ObjectYAML/MachOYAML.h
@@ -131,6 +131,7 @@ struct Object {
   std::vector<LoadCommand> LoadCommands;
   std::vector<Section> Sections;
   LinkEditData LinkEdit;
+  Optional<llvm::yaml::BinaryRef> RawLinkEditSegment;
   DWARFYAML::Data DWARF;
 };
 
diff --git a/llvm/include/llvm/ObjectYAML/WasmYAML.h b/llvm/include/llvm/ObjectYAML/WasmYAML.h
index 661e06fba8bd..e3a1ba0d58a6 100644
--- a/llvm/include/llvm/ObjectYAML/WasmYAML.h
+++ b/llvm/include/llvm/ObjectYAML/WasmYAML.h
@@ -77,12 +77,6 @@ struct Global {
   wasm::WasmInitExpr InitExpr;
 };
 
-struct Tag {
-  uint32_t Index;
-  uint32_t Attribute;
-  uint32_t SigIndex;
-};
-
 struct Import {
   StringRef Module;
   StringRef Field;
@@ -92,7 +86,7 @@ struct Import {
     Global GlobalImport;
     Table TableImport;
     Limits Memory;
-    Tag TagImport;
+    uint32_t TagIndex;
   };
 };
 
@@ -199,12 +193,23 @@ struct CustomSection : Section {
   yaml::BinaryRef Payload;
 };
 
+struct DylinkImportInfo {
+  StringRef Module;
+  StringRef Field;
+  SymbolFlags Flags;
+};
+
+struct DylinkExportInfo {
+  StringRef Name;
+  SymbolFlags Flags;
+};
+
 struct DylinkSection : CustomSection {
-  DylinkSection() : CustomSection("dylink") {}
+  DylinkSection() : CustomSection("dylink.0") {}
 
   static bool classof(const Section *S) {
     auto C = dyn_cast<CustomSection>(S);
-    return C && C->Name == "dylink";
+    return C && C->Name == "dylink.0";
   }
 
   uint32_t MemorySize;
@@ -212,6 +217,8 @@ struct DylinkSection : CustomSection {
   uint32_t TableSize;
   uint32_t TableAlignment;
   std::vector<StringRef> Needed;
+  std::vector<DylinkImportInfo> ImportInfo;
+  std::vector<DylinkExportInfo> ExportInfo;
 };
 
 struct NameSection : CustomSection {
@@ -323,7 +330,7 @@ struct TagSection : Section {
     return S->Type == wasm::WASM_SEC_TAG;
   }
 
-  std::vector<Tag> Tags;
+  std::vector<uint32_t> TagTypes;
 };
 
 struct GlobalSection : Section {
@@ -425,7 +432,8 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::SymbolInfo)
 LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::InitFunction)
 LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::ComdatEntry)
 LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::Comdat)
-LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::Tag)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::DylinkImportInfo)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::DylinkExportInfo)
 
 namespace llvm {
 namespace yaml {
@@ -570,8 +578,12 @@ template <> struct ScalarEnumerationTraits<WasmYAML::RelocType> {
   static void enumeration(IO &IO, WasmYAML::RelocType &Kind);
 };
 
-template <> struct MappingTraits<WasmYAML::Tag> {
-  static void mapping(IO &IO, WasmYAML::Tag &Tag);
+template <> struct MappingTraits<WasmYAML::DylinkImportInfo> {
+  static void mapping(IO &IO, WasmYAML::DylinkImportInfo &Info);
+};
+
+template <> struct MappingTraits<WasmYAML::DylinkExportInfo> {
+  static void mapping(IO &IO, WasmYAML::DylinkExportInfo &Info);
 };
 
 } // end namespace yaml
diff --git a/llvm/include/llvm/ObjectYAML/XCOFFYAML.h b/llvm/include/llvm/ObjectYAML/XCOFFYAML.h
index 2630175642c4..aa1bc396f134 100644
--- a/llvm/include/llvm/ObjectYAML/XCOFFYAML.h
+++ b/llvm/include/llvm/ObjectYAML/XCOFFYAML.h
@@ -24,11 +24,43 @@ struct FileHeader {
   uint16_t NumberOfSections;
   int32_t TimeStamp;
   llvm::yaml::Hex64 SymbolTableOffset;
-  uint32_t NumberOfSymTableEntries;
+  int32_t NumberOfSymTableEntries;
   uint16_t AuxHeaderSize;
   llvm::yaml::Hex16 Flags;
 };
 
+struct AuxiliaryHeader {
+  Optional<llvm::yaml::Hex16> Magic;
+  Optional<llvm::yaml::Hex16> Version;
+  Optional<llvm::yaml::Hex64> TextStartAddr;
+  Optional<llvm::yaml::Hex64> DataStartAddr;
+  Optional<llvm::yaml::Hex64> TOCAnchorAddr;
+  Optional<uint16_t> SecNumOfEntryPoint;
+  Optional<uint16_t> SecNumOfText;
+  Optional<uint16_t> SecNumOfData;
+  Optional<uint16_t> SecNumOfTOC;
+  Optional<uint16_t> SecNumOfLoader;
+  Optional<uint16_t> SecNumOfBSS;
+  Optional<llvm::yaml::Hex16> MaxAlignOfText;
+  Optional<llvm::yaml::Hex16> MaxAlignOfData;
+  Optional<llvm::yaml::Hex16> ModuleType;
+  Optional<llvm::yaml::Hex8> CpuFlag;
+  Optional<llvm::yaml::Hex8> CpuType;
+  Optional<llvm::yaml::Hex8> TextPageSize;
+  Optional<llvm::yaml::Hex8> DataPageSize;
+  Optional<llvm::yaml::Hex8> StackPageSize;
+  Optional<llvm::yaml::Hex8> FlagAndTDataAlignment;
+  Optional<llvm::yaml::Hex64> TextSize;
+  Optional<llvm::yaml::Hex64> InitDataSize;
+  Optional<llvm::yaml::Hex64> BssDataSize;
+  Optional<llvm::yaml::Hex64> EntryPointAddr;
+  Optional<llvm::yaml::Hex64> MaxStackSize;
+  Optional<llvm::yaml::Hex64> MaxDataSize;
+  Optional<uint16_t> SecNumOfTData;
+  Optional<uint16_t> SecNumOfTBSS;
+  Optional<llvm::yaml::Hex16> Flag;
+};
+
 struct Relocation {
   llvm::yaml::Hex64 VirtualAddress;
   llvm::yaml::Hex64 SymbolIndex;
@@ -53,16 +85,27 @@ struct Section {
 struct Symbol {
   StringRef SymbolName;
   llvm::yaml::Hex64 Value; // Symbol value; storage class-dependent.
-  StringRef SectionName;
+  Optional<StringRef> SectionName;
+  Optional<uint16_t> SectionIndex;
   llvm::yaml::Hex16 Type;
   XCOFF::StorageClass StorageClass;
   uint8_t NumberOfAuxEntries;
 };
 
+struct StringTable {
+  Optional<uint32_t> ContentSize; // The total size of the string table.
+  Optional<uint32_t> Length;      // The value of the length field for the first
+                                  // 4 bytes of the table.
+  Optional<std::vector<StringRef>> Strings;
+  Optional<yaml::BinaryRef> RawContent;
+};
+
 struct Object {
   FileHeader Header;
+  Optional<AuxiliaryHeader> AuxHeader;
   std::vector<Section> Sections;
   std::vector<Symbol> Symbols;
+  StringTable StrTbl;
   Object();
 };
 } // namespace XCOFFYAML
@@ -87,6 +130,9 @@ template <> struct MappingTraits<XCOFFYAML::FileHeader> {
   static void mapping(IO &IO, XCOFFYAML::FileHeader &H);
 };
 
+template <> struct MappingTraits<XCOFFYAML::AuxiliaryHeader> {
+  static void mapping(IO &IO, XCOFFYAML::AuxiliaryHeader &AuxHdr);
+};
 
 template <> struct MappingTraits<XCOFFYAML::Symbol> {
   static void mapping(IO &IO, XCOFFYAML::Symbol &S);
@@ -100,6 +146,10 @@ template <> struct MappingTraits<XCOFFYAML::Section> {
   static void mapping(IO &IO, XCOFFYAML::Section &Sec);
 };
 
+template <> struct MappingTraits<XCOFFYAML::StringTable> {
+  static void mapping(IO &IO, XCOFFYAML::StringTable &Str);
+};
+
 template <> struct MappingTraits<XCOFFYAML::Object> {
   static void mapping(IO &IO, XCOFFYAML::Object &Obj);
 };
diff --git a/llvm/include/llvm/Option/Arg.h b/llvm/include/llvm/Option/Arg.h
index 22e2bcf06a6e..4be254ccdab4 100644
--- a/llvm/include/llvm/Option/Arg.h
+++ b/llvm/include/llvm/Option/Arg.h
@@ -118,10 +118,7 @@ public:
   const SmallVectorImpl<const char *> &getValues() const { return Values; }
 
   bool containsValue(StringRef Value) const {
-    for (unsigned i = 0, e = getNumValues(); i != e; ++i)
-      if (Values[i] == Value)
-        return true;
-    return false;
+    return llvm::is_contained(Values, Value);
   }
 
   /// Append the argument onto the given array as strings.
diff --git a/llvm/include/llvm/Option/OptParser.td b/llvm/include/llvm/Option/OptParser.td
index 96014b505d0f..9c73f478db5e 100644
--- a/llvm/include/llvm/Option/OptParser.td
+++ b/llvm/include/llvm/Option/OptParser.td
@@ -214,7 +214,7 @@ class MarshallingInfoBitfieldFlag<KeyPathAndMacro kpm, code value>
 }
 
 // Implementation detail of BoolOption.
-class MarshallingInfoBooleanFlag<KeyPathAndMacro kpm, code defaultvalue, code value, code name,
+class MarshallingInfoBooleanFlag<KeyPathAndMacro kpm, code defaultvalue, code value,
                                  code other_value, code other_name>
   : MarshallingInfoFlag<kpm, defaultvalue> {
   code Normalizer = "makeBooleanOptionNormalizer("#value#", "#other_value#", OPT_"#other_name#")";
diff --git a/llvm/include/llvm/Option/OptTable.h b/llvm/include/llvm/Option/OptTable.h
index ca2013ee6f04..07d9870f71b3 100644
--- a/llvm/include/llvm/Option/OptTable.h
+++ b/llvm/include/llvm/Option/OptTable.h
@@ -64,8 +64,8 @@ private:
   bool GroupedShortOptions = false;
   const char *EnvVar = nullptr;
 
-  unsigned TheInputOptionID = 0;
-  unsigned TheUnknownOptionID = 0;
+  unsigned InputOptionID = 0;
+  unsigned UnknownOptionID = 0;
 
   /// The index of the first option which can be parsed (i.e., is not a
   /// special option like 'input' or 'unknown', and is not an option group).
@@ -83,7 +83,8 @@ private:
     return OptionInfos[id - 1];
   }
 
-  Arg *parseOneArgGrouped(InputArgList &Args, unsigned &Index) const;
+  std::unique_ptr<Arg> parseOneArgGrouped(InputArgList &Args,
+                                          unsigned &Index) const;
 
 protected:
   OptTable(ArrayRef<Info> OptionInfos, bool IgnoreCase = false);
@@ -199,9 +200,9 @@ public:
   /// \return The parsed argument, or 0 if the argument is missing values
   /// (in which case Index still points at the conceptual next argument string
   /// to parse).
-  Arg *ParseOneArg(const ArgList &Args, unsigned &Index,
-                   unsigned FlagsToInclude = 0,
-                   unsigned FlagsToExclude = 0) const;
+  std::unique_ptr<Arg> ParseOneArg(const ArgList &Args, unsigned &Index,
+                                   unsigned FlagsToInclude = 0,
+                                   unsigned FlagsToExclude = 0) const;
 
   /// Parse an list of arguments into an InputArgList.
   ///
diff --git a/llvm/include/llvm/Option/Option.h b/llvm/include/llvm/Option/Option.h
index 196cf656355d..106f6863fca1 100644
--- a/llvm/include/llvm/Option/Option.h
+++ b/llvm/include/llvm/Option/Option.h
@@ -205,9 +205,9 @@ public:
   /// always be false.
   bool matches(OptSpecifier ID) const;
 
-  /// accept - Potentially accept the current argument, returning a
-  /// new Arg instance, or 0 if the option does not accept this
-  /// argument (or the argument is missing values).
+  /// Potentially accept the current argument, returning a new Arg instance,
+  /// or 0 if the option does not accept this argument (or the argument is
+  /// missing values).
   ///
   /// If the option accepts the current argument, accept() sets
   /// Index to the position where argument parsing should resume
@@ -217,12 +217,12 @@ public:
   /// underlying storage to represent a Joined argument.
   /// \p GroupedShortOption If true, we are handling the fallback case of
   /// parsing a prefix of the current argument as a short option.
-  Arg *accept(const ArgList &Args, StringRef CurArg, bool GroupedShortOption,
-              unsigned &Index) const;
+  std::unique_ptr<Arg> accept(const ArgList &Args, StringRef CurArg,
+                              bool GroupedShortOption, unsigned &Index) const;
 
 private:
-  Arg *acceptInternal(const ArgList &Args, StringRef CurArg,
-                      unsigned &Index) const;
+  std::unique_ptr<Arg> acceptInternal(const ArgList &Args, StringRef CurArg,
+                                      unsigned &Index) const;
 
 public:
   void print(raw_ostream &O) const;
diff --git a/llvm/include/llvm/Passes/OptimizationLevel.h b/llvm/include/llvm/Passes/OptimizationLevel.h
new file mode 100644
index 000000000000..d2c3fde4935f
--- /dev/null
+++ b/llvm/include/llvm/Passes/OptimizationLevel.h
@@ -0,0 +1,127 @@
+//===-------- LLVM-provided High-Level Optimization levels -*- C++ -*------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This header enumerates the LLVM-provided high-level optimization levels.
+/// Each level has a specific goal and rationale.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_PASSES_OPTIMIZATIONLEVEL_H
+#define LLVM_PASSES_OPTIMIZATIONLEVEL_H
+
+#include <assert.h>
+
+namespace llvm {
+
+class OptimizationLevel final {
+  unsigned SpeedLevel = 2;
+  unsigned SizeLevel = 0;
+  OptimizationLevel(unsigned SpeedLevel, unsigned SizeLevel)
+      : SpeedLevel(SpeedLevel), SizeLevel(SizeLevel) {
+    // Check that only valid combinations are passed.
+    assert(SpeedLevel <= 3 &&
+           "Optimization level for speed should be 0, 1, 2, or 3");
+    assert(SizeLevel <= 2 &&
+           "Optimization level for size should be 0, 1, or 2");
+    assert((SizeLevel == 0 || SpeedLevel == 2) &&
+           "Optimize for size should be encoded with speedup level == 2");
+  }
+
+public:
+  OptimizationLevel() = default;
+  /// Disable as many optimizations as possible. This doesn't completely
+  /// disable the optimizer in all cases, for example always_inline functions
+  /// can be required to be inlined for correctness.
+  static const OptimizationLevel O0;
+
+  /// Optimize quickly without destroying debuggability.
+  ///
+  /// This level is tuned to produce a result from the optimizer as quickly
+  /// as possible and to avoid destroying debuggability. This tends to result
+  /// in a very good development mode where the compiled code will be
+  /// immediately executed as part of testing. As a consequence, where
+  /// possible, we would like to produce efficient-to-execute code, but not
+  /// if it significantly slows down compilation or would prevent even basic
+  /// debugging of the resulting binary.
+  ///
+  /// As an example, complex loop transformations such as versioning,
+  /// vectorization, or fusion don't make sense here due to the degree to
+  /// which the executed code differs from the source code, and the compile
+  /// time cost.
+  static const OptimizationLevel O1;
+  /// Optimize for fast execution as much as possible without triggering
+  /// significant incremental compile time or code size growth.
+  ///
+  /// The key idea is that optimizations at this level should "pay for
+  /// themselves". So if an optimization increases compile time by 5% or
+  /// increases code size by 5% for a particular benchmark, that benchmark
+  /// should also be one which sees a 5% runtime improvement. If the compile
+  /// time or code size penalties happen on average across a diverse range of
+  /// LLVM users' benchmarks, then the improvements should as well.
+  ///
+  /// And no matter what, the compile time needs to not grow superlinearly
+  /// with the size of input to LLVM so that users can control the runtime of
+  /// the optimizer in this mode.
+  ///
+  /// This is expected to be a good default optimization level for the vast
+  /// majority of users.
+  static const OptimizationLevel O2;
+  /// Optimize for fast execution as much as possible.
+  ///
+  /// This mode is significantly more aggressive in trading off compile time
+  /// and code size to get execution time improvements. The core idea is that
+  /// this mode should include any optimization that helps execution time on
+  /// balance across a diverse collection of benchmarks, even if it increases
+  /// code size or compile time for some benchmarks without corresponding
+  /// improvements to execution time.
+  ///
+  /// Despite being willing to trade more compile time off to get improved
+  /// execution time, this mode still tries to avoid superlinear growth in
+  /// order to make even significantly slower compile times at least scale
+  /// reasonably. This does not preclude very substantial constant factor
+  /// costs though.
+  static const OptimizationLevel O3;
+  /// Similar to \c O2 but tries to optimize for small code size instead of
+  /// fast execution without triggering significant incremental execution
+  /// time slowdowns.
+  ///
+  /// The logic here is exactly the same as \c O2, but with code size and
+  /// execution time metrics swapped.
+  ///
+  /// A consequence of the different core goal is that this should in general
+  /// produce substantially smaller executables that still run in
+  /// a reasonable amount of time.
+  static const OptimizationLevel Os;
+  /// A very specialized mode that will optimize for code size at any and all
+  /// costs.
+  ///
+  /// This is useful primarily when there are absolute size limitations and
+  /// any effort taken to reduce the size is worth it regardless of the
+  /// execution time impact. You should expect this level to produce rather
+  /// slow, but very small, code.
+  static const OptimizationLevel Oz;
+
+  bool isOptimizingForSpeed() const { return SizeLevel == 0 && SpeedLevel > 0; }
+
+  bool isOptimizingForSize() const { return SizeLevel > 0; }
+
+  bool operator==(const OptimizationLevel &Other) const {
+    return SizeLevel == Other.SizeLevel && SpeedLevel == Other.SpeedLevel;
+  }
+  bool operator!=(const OptimizationLevel &Other) const {
+    return SizeLevel != Other.SizeLevel || SpeedLevel != Other.SpeedLevel;
+  }
+
+  unsigned getSpeedupLevel() const { return SpeedLevel; }
+
+  unsigned getSizeLevel() const { return SizeLevel; }
+};
+} // namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h
index fae3e2cd2e0b..7c7883e98183 100644
--- a/llvm/include/llvm/Passes/PassBuilder.h
+++ b/llvm/include/llvm/Passes/PassBuilder.h
@@ -18,9 +18,12 @@
 #include "llvm/ADT/Optional.h"
 #include "llvm/Analysis/CGSCCPassManager.h"
 #include "llvm/IR/PassManager.h"
+#include "llvm/Passes/OptimizationLevel.h"
 #include "llvm/Support/Error.h"
+#include "llvm/Support/PGOOptions.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/IPO/Inliner.h"
+#include "llvm/Transforms/IPO/ModuleInliner.h"
 #include "llvm/Transforms/Instrumentation.h"
 #include "llvm/Transforms/Scalar/LoopPassManager.h"
 #include <vector>
@@ -31,57 +34,6 @@ class AAManager;
 class TargetMachine;
 class ModuleSummaryIndex;
 
-/// A struct capturing PGO tunables.
-struct PGOOptions {
-  enum PGOAction { NoAction, IRInstr, IRUse, SampleUse };
-  enum CSPGOAction { NoCSAction, CSIRInstr, CSIRUse };
-  PGOOptions(std::string ProfileFile = "", std::string CSProfileGenFile = "",
-             std::string ProfileRemappingFile = "", PGOAction Action = NoAction,
-             CSPGOAction CSAction = NoCSAction,
-             bool DebugInfoForProfiling = false,
-             bool PseudoProbeForProfiling = false)
-      : ProfileFile(ProfileFile), CSProfileGenFile(CSProfileGenFile),
-        ProfileRemappingFile(ProfileRemappingFile), Action(Action),
-        CSAction(CSAction), DebugInfoForProfiling(DebugInfoForProfiling ||
-                                                  (Action == SampleUse &&
-                                                   !PseudoProbeForProfiling)),
-        PseudoProbeForProfiling(PseudoProbeForProfiling) {
-    // Note, we do allow ProfileFile.empty() for Action=IRUse LTO can
-    // callback with IRUse action without ProfileFile.
-
-    // If there is a CSAction, PGOAction cannot be IRInstr or SampleUse.
-    assert(this->CSAction == NoCSAction ||
-           (this->Action != IRInstr && this->Action != SampleUse));
-
-    // For CSIRInstr, CSProfileGenFile also needs to be nonempty.
-    assert(this->CSAction != CSIRInstr || !this->CSProfileGenFile.empty());
-
-    // If CSAction is CSIRUse, PGOAction needs to be IRUse as they share
-    // a profile.
-    assert(this->CSAction != CSIRUse || this->Action == IRUse);
-
-    // If neither Action nor CSAction, DebugInfoForProfiling or
-    // PseudoProbeForProfiling needs to be true.
-    assert(this->Action != NoAction || this->CSAction != NoCSAction ||
-           this->DebugInfoForProfiling || this->PseudoProbeForProfiling);
-
-    // Pseudo probe emission does not work with -fdebug-info-for-profiling since
-    // they both use the discriminator field of debug lines but for different
-    // purposes.
-    if (this->DebugInfoForProfiling && this->PseudoProbeForProfiling) {
-      report_fatal_error(
-          "Pseudo probes cannot be used with -debug-info-for-profiling", false);
-    }
-  }
-  std::string ProfileFile;
-  std::string CSProfileGenFile;
-  std::string ProfileRemappingFile;
-  PGOAction Action;
-  CSPGOAction CSAction;
-  bool DebugInfoForProfiling;
-  bool PseudoProbeForProfiling;
-};
-
 /// Tunable parameters for passes in the default pipelines.
 class PipelineTuningOptions {
 public:
@@ -122,6 +74,15 @@ public:
   /// Tuning option to enable/disable function merging. Its default value is
   /// false.
   bool MergeFunctions;
+
+  // Experimental option to eagerly invalidate more analyses. This has the
+  // potential to decrease max memory usage in exchange for more compile time.
+  // This may affect codegen due to either passes using analyses only when
+  // cached, or invalidating and recalculating an analysis that was
+  // stale/imprecise but still valid. Currently this invalidates all function
+  // analyses after various module->function or cgscc->function adaptors in the
+  // default pipelines.
+  bool EagerlyInvalidateAnalyses;
 };
 
 /// This class provides access to building LLVM's passes.
@@ -150,116 +111,6 @@ public:
     std::vector<PipelineElement> InnerPipeline;
   };
 
-  /// LLVM-provided high-level optimization levels.
-  ///
-  /// This enumerates the LLVM-provided high-level optimization levels. Each
-  /// level has a specific goal and rationale.
-  class OptimizationLevel final {
-    unsigned SpeedLevel = 2;
-    unsigned SizeLevel = 0;
-    OptimizationLevel(unsigned SpeedLevel, unsigned SizeLevel)
-        : SpeedLevel(SpeedLevel), SizeLevel(SizeLevel) {
-      // Check that only valid combinations are passed.
-      assert(SpeedLevel <= 3 &&
-             "Optimization level for speed should be 0, 1, 2, or 3");
-      assert(SizeLevel <= 2 &&
-             "Optimization level for size should be 0, 1, or 2");
-      assert((SizeLevel == 0 || SpeedLevel == 2) &&
-             "Optimize for size should be encoded with speedup level == 2");
-    }
-
-  public:
-    OptimizationLevel() = default;
-    /// Disable as many optimizations as possible. This doesn't completely
-    /// disable the optimizer in all cases, for example always_inline functions
-    /// can be required to be inlined for correctness.
-    static const OptimizationLevel O0;
-
-    /// Optimize quickly without destroying debuggability.
-    ///
-    /// This level is tuned to produce a result from the optimizer as quickly
-    /// as possible and to avoid destroying debuggability. This tends to result
-    /// in a very good development mode where the compiled code will be
-    /// immediately executed as part of testing. As a consequence, where
-    /// possible, we would like to produce efficient-to-execute code, but not
-    /// if it significantly slows down compilation or would prevent even basic
-    /// debugging of the resulting binary.
-    ///
-    /// As an example, complex loop transformations such as versioning,
-    /// vectorization, or fusion don't make sense here due to the degree to
-    /// which the executed code differs from the source code, and the compile
-    /// time cost.
-    static const OptimizationLevel O1;
-    /// Optimize for fast execution as much as possible without triggering
-    /// significant incremental compile time or code size growth.
-    ///
-    /// The key idea is that optimizations at this level should "pay for
-    /// themselves". So if an optimization increases compile time by 5% or
-    /// increases code size by 5% for a particular benchmark, that benchmark
-    /// should also be one which sees a 5% runtime improvement. If the compile
-    /// time or code size penalties happen on average across a diverse range of
-    /// LLVM users' benchmarks, then the improvements should as well.
-    ///
-    /// And no matter what, the compile time needs to not grow superlinearly
-    /// with the size of input to LLVM so that users can control the runtime of
-    /// the optimizer in this mode.
-    ///
-    /// This is expected to be a good default optimization level for the vast
-    /// majority of users.
-    static const OptimizationLevel O2;
-    /// Optimize for fast execution as much as possible.
-    ///
-    /// This mode is significantly more aggressive in trading off compile time
-    /// and code size to get execution time improvements. The core idea is that
-    /// this mode should include any optimization that helps execution time on
-    /// balance across a diverse collection of benchmarks, even if it increases
-    /// code size or compile time for some benchmarks without corresponding
-    /// improvements to execution time.
-    ///
-    /// Despite being willing to trade more compile time off to get improved
-    /// execution time, this mode still tries to avoid superlinear growth in
-    /// order to make even significantly slower compile times at least scale
-    /// reasonably. This does not preclude very substantial constant factor
-    /// costs though.
-    static const OptimizationLevel O3;
-    /// Similar to \c O2 but tries to optimize for small code size instead of
-    /// fast execution without triggering significant incremental execution
-    /// time slowdowns.
-    ///
-    /// The logic here is exactly the same as \c O2, but with code size and
-    /// execution time metrics swapped.
-    ///
-    /// A consequence of the different core goal is that this should in general
-    /// produce substantially smaller executables that still run in
-    /// a reasonable amount of time.
-    static const OptimizationLevel Os;
-    /// A very specialized mode that will optimize for code size at any and all
-    /// costs.
-    ///
-    /// This is useful primarily when there are absolute size limitations and
-    /// any effort taken to reduce the size is worth it regardless of the
-    /// execution time impact. You should expect this level to produce rather
-    /// slow, but very small, code.
-    static const OptimizationLevel Oz;
-
-    bool isOptimizingForSpeed() const {
-      return SizeLevel == 0 && SpeedLevel > 0;
-    }
-
-    bool isOptimizingForSize() const { return SizeLevel > 0; }
-
-    bool operator==(const OptimizationLevel &Other) const {
-      return SizeLevel == Other.SizeLevel && SpeedLevel == Other.SpeedLevel;
-    }
-    bool operator!=(const OptimizationLevel &Other) const {
-      return SizeLevel != Other.SizeLevel || SpeedLevel != Other.SpeedLevel;
-    }
-
-    unsigned getSpeedupLevel() const { return SpeedLevel; }
-
-    unsigned getSizeLevel() const { return SizeLevel; }
-  };
-
   explicit PassBuilder(TargetMachine *TM = nullptr,
                        PipelineTuningOptions PTO = PipelineTuningOptions(),
                        Optional<PGOOptions> PGOOpt = None,
@@ -346,6 +197,11 @@ public:
   ModuleInlinerWrapperPass buildInlinerPipeline(OptimizationLevel Level,
                                                 ThinOrFullLTOPhase Phase);
 
+  /// Construct the module pipeline that performs inlining with
+  /// module inliner pass.
+  ModuleInlinerPass buildModuleInlinerPipeline(OptimizationLevel Level,
+                                               ThinOrFullLTOPhase Phase);
+
   /// Construct the core LLVM module optimization pipeline.
   ///
   /// This pipeline focuses on optimizing the execution speed of the IR. It
diff --git a/llvm/include/llvm/Passes/StandardInstrumentations.h b/llvm/include/llvm/Passes/StandardInstrumentations.h
index 2f573585e766..6cab4ce7d138 100644
--- a/llvm/include/llvm/Passes/StandardInstrumentations.h
+++ b/llvm/include/llvm/Passes/StandardInstrumentations.h
@@ -215,8 +215,6 @@ protected:
   virtual void handleFiltered(StringRef PassID, std::string &Name) = 0;
   // Called when an ignored pass is encountered.
   virtual void handleIgnored(StringRef PassID, std::string &Name) = 0;
-  // Called to compare the before and after representations of the IR.
-  virtual bool same(const IRUnitT &Before, const IRUnitT &After) = 0;
 
   // Stack of IRs before passes.
   std::vector<IRUnitT> BeforeStack;
@@ -269,50 +267,47 @@ protected:
   void handleAfter(StringRef PassID, std::string &Name,
                    const std::string &Before, const std::string &After,
                    Any) override;
-  // Called to compare the before and after representations of the IR.
-  bool same(const std::string &Before, const std::string &After) override;
 };
 
-// The following classes hold a representation of the IR for a change
-// reporter that uses string comparisons of the basic blocks
-// that are created using print (ie, similar to dump()).
-// These classes respect the filtering of passes and functions using
-// -filter-passes and -filter-print-funcs.
-//
 // Information that needs to be saved for a basic block in order to compare
 // before and after the pass to determine if it was changed by a pass.
-class ChangedBlockData {
+template <typename T> class BlockDataT {
 public:
-  ChangedBlockData(const BasicBlock &B);
-
-  bool operator==(const ChangedBlockData &That) const {
-    return Body == That.Body;
-  }
-  bool operator!=(const ChangedBlockData &That) const {
-    return Body != That.Body;
+  BlockDataT(const BasicBlock &B) : Label(B.getName().str()), Data(B) {
+    raw_string_ostream SS(Body);
+    B.print(SS, nullptr, true, true);
   }
 
+  bool operator==(const BlockDataT &That) const { return Body == That.Body; }
+  bool operator!=(const BlockDataT &That) const { return Body != That.Body; }
+
   // Return the label of the represented basic block.
   StringRef getLabel() const { return Label; }
   // Return the string representation of the basic block.
   StringRef getBody() const { return Body; }
 
+  // Return the associated data
+  const T &getData() const { return Data; }
+
 protected:
   std::string Label;
   std::string Body;
+
+  // Extra data associated with a basic block
+  T Data;
 };
 
-template <typename IRData> class OrderedChangedData {
+template <typename T> class OrderedChangedData {
 public:
   // Return the names in the order they were saved
   std::vector<std::string> &getOrder() { return Order; }
   const std::vector<std::string> &getOrder() const { return Order; }
 
   // Return a map of names to saved representations
-  StringMap<IRData> &getData() { return Data; }
-  const StringMap<IRData> &getData() const { return Data; }
+  StringMap<T> &getData() { return Data; }
+  const StringMap<T> &getData() const { return Data; }
 
-  bool operator==(const OrderedChangedData<IRData> &That) const {
+  bool operator==(const OrderedChangedData<T> &That) const {
     return Data == That.getData();
   }
 
@@ -321,55 +316,64 @@ public:
   // with ones that are only in \p Before interspersed based on where they
   // occur in \p Before.  This is used to present the output in an order
   // based on how the data is ordered in LLVM.
-  static void
-  report(const OrderedChangedData &Before, const OrderedChangedData &After,
-         function_ref<void(const IRData *, const IRData *)> HandlePair);
+  static void report(const OrderedChangedData &Before,
+                     const OrderedChangedData &After,
+                     function_ref<void(const T *, const T *)> HandlePair);
 
 protected:
   std::vector<std::string> Order;
-  StringMap<IRData> Data;
+  StringMap<T> Data;
+};
+
+// Do not need extra information for patch-style change reporter.
+class EmptyData {
+public:
+  EmptyData(const BasicBlock &) {}
 };
 
 // The data saved for comparing functions.
-using ChangedFuncData = OrderedChangedData<ChangedBlockData>;
+template <typename T>
+class FuncDataT : public OrderedChangedData<BlockDataT<T>> {
+public:
+  FuncDataT(std::string S) : EntryBlockName(S) {}
+
+  // Return the name of the entry block
+  std::string getEntryBlockName() const { return EntryBlockName; }
+
+protected:
+  std::string EntryBlockName;
+};
 
-// A map of names to the saved data.
-using ChangedIRData = OrderedChangedData<ChangedFuncData>;
+// The data saved for comparing IRs.
+template <typename T>
+class IRDataT : public OrderedChangedData<FuncDataT<T>> {};
 
-// A class that compares two IRs and does a diff between them.  The
-// added lines are prefixed with a '+', the removed lines are prefixed
-// with a '-' and unchanged lines are prefixed with a space (to have
-// things line up).
-class ChangedIRComparer {
+// Abstract template base class for a class that compares two IRs.  The
+// class is created with the 2 IRs to compare and then compare is called.
+// The static function analyzeIR is used to build up the IR representation.
+template <typename T> class IRComparer {
 public:
-  ChangedIRComparer(raw_ostream &OS, const ChangedIRData &Before,
-                    const ChangedIRData &After, bool ColourMode)
-      : Before(Before), After(After), Out(OS), UseColour(ColourMode) {}
+  IRComparer(const IRDataT<T> &Before, const IRDataT<T> &After)
+      : Before(Before), After(After) {}
 
-  // Compare the 2 IRs.
-  void compare(Any IR, StringRef Prefix, StringRef PassID, StringRef Name);
+  // Compare the 2 IRs. \p handleFunctionCompare is called to handle the
+  // compare of a function. When \p InModule is set,
+  // this function is being handled as part of comparing a module.
+  void compare(
+      bool CompareModule,
+      std::function<void(bool InModule, unsigned Minor,
+                         const FuncDataT<T> &Before, const FuncDataT<T> &After)>
+          CompareFunc);
 
   // Analyze \p IR and build the IR representation in \p Data.
-  static void analyzeIR(Any IR, ChangedIRData &Data);
+  static void analyzeIR(Any IR, IRDataT<T> &Data);
 
 protected:
-  // Return the module when that is the appropriate level of
-  // comparison for \p IR.
-  static const Module *getModuleForComparison(Any IR);
-
   // Generate the data for \p F into \p Data.
-  static bool generateFunctionData(ChangedIRData &Data, const Function &F);
+  static bool generateFunctionData(IRDataT<T> &Data, const Function &F);
 
-  // Called to handle the compare of a function. When \p InModule is set,
-  // this function is being handled as part of comparing a module.
-  void handleFunctionCompare(StringRef Name, StringRef Prefix, StringRef PassID,
-                             bool InModule, const ChangedFuncData &Before,
-                             const ChangedFuncData &After);
-
-  const ChangedIRData &Before;
-  const ChangedIRData &After;
-  raw_ostream &Out;
-  bool UseColour;
+  const IRDataT<T> &Before;
+  const IRDataT<T> &After;
 };
 
 // A change printer that prints out in-line differences in the basic
@@ -378,25 +382,28 @@ protected:
 // and added, respectively.  Changes to the IR that do not affect basic
 // blocks are not reported as having changed the IR.  The option
 // -print-module-scope does not affect this change reporter.
-class InLineChangePrinter : public TextChangeReporter<ChangedIRData> {
+class InLineChangePrinter : public TextChangeReporter<IRDataT<EmptyData>> {
 public:
   InLineChangePrinter(bool VerboseMode, bool ColourMode)
-      : TextChangeReporter<ChangedIRData>(VerboseMode), UseColour(ColourMode) {}
+      : TextChangeReporter<IRDataT<EmptyData>>(VerboseMode),
+        UseColour(ColourMode) {}
   ~InLineChangePrinter() override;
   void registerCallbacks(PassInstrumentationCallbacks &PIC);
 
 protected:
   // Create a representation of the IR.
   virtual void generateIRRepresentation(Any IR, StringRef PassID,
-                                        ChangedIRData &Output) override;
+                                        IRDataT<EmptyData> &Output) override;
 
   // Called when an interesting IR has changed.
   virtual void handleAfter(StringRef PassID, std::string &Name,
-                           const ChangedIRData &Before,
-                           const ChangedIRData &After, Any) override;
-  // Called to compare the before and after representations of the IR.
-  virtual bool same(const ChangedIRData &Before,
-                    const ChangedIRData &After) override;
+                           const IRDataT<EmptyData> &Before,
+                           const IRDataT<EmptyData> &After, Any) override;
+
+  void handleFunctionCompare(StringRef Name, StringRef Prefix, StringRef PassID,
+                             StringRef Divider, bool InModule, unsigned Minor,
+                             const FuncDataT<EmptyData> &Before,
+                             const FuncDataT<EmptyData> &After);
 
   bool UseColour;
 };
@@ -409,6 +416,81 @@ public:
   void registerCallbacks(PassInstrumentationCallbacks &PIC);
 };
 
+// Class that holds transitions between basic blocks.  The transitions
+// are contained in a map of values to names of basic blocks.
+class DCData {
+public:
+  // Fill the map with the transitions from basic block \p B.
+  DCData(const BasicBlock &B);
+
+  // Return an iterator to the names of the successor blocks.
+  StringMap<std::string>::const_iterator begin() const {
+    return Successors.begin();
+  }
+  StringMap<std::string>::const_iterator end() const {
+    return Successors.end();
+  }
+
+  // Return the label of the basic block reached on a transition on \p S.
+  const StringRef getSuccessorLabel(StringRef S) const {
+    assert(Successors.count(S) == 1 && "Expected to find successor.");
+    return Successors.find(S)->getValue();
+  }
+
+protected:
+  // Add a transition to \p Succ on \p Label
+  void addSuccessorLabel(StringRef Succ, StringRef Label) {
+    std::pair<std::string, std::string> SS{Succ.str(), Label.str()};
+    Successors.insert(SS);
+  }
+
+  StringMap<std::string> Successors;
+};
+
+// A change reporter that builds a website with links to pdf files showing
+// dot control flow graphs with changed instructions shown in colour.
+class DotCfgChangeReporter : public ChangeReporter<IRDataT<DCData>> {
+public:
+  DotCfgChangeReporter(bool Verbose);
+  ~DotCfgChangeReporter() override;
+  void registerCallbacks(PassInstrumentationCallbacks &PIC);
+
+protected:
+  // Initialize the HTML file and output the header.
+  bool initializeHTML();
+
+  // Called on the first IR processed.
+  void handleInitialIR(Any IR) override;
+  // Called before and after a pass to get the representation of the IR.
+  void generateIRRepresentation(Any IR, StringRef PassID,
+                                IRDataT<DCData> &Output) override;
+  // Called when the pass is not iteresting.
+  void omitAfter(StringRef PassID, std::string &Name) override;
+  // Called when an interesting IR has changed.
+  void handleAfter(StringRef PassID, std::string &Name,
+                   const IRDataT<DCData> &Before, const IRDataT<DCData> &After,
+                   Any) override;
+  // Called when an interesting pass is invalidated.
+  void handleInvalidated(StringRef PassID) override;
+  // Called when the IR or pass is not interesting.
+  void handleFiltered(StringRef PassID, std::string &Name) override;
+  // Called when an ignored pass is encountered.
+  void handleIgnored(StringRef PassID, std::string &Name) override;
+
+  // Generate the pdf file into \p Dir / \p PDFFileName using \p DotFile as
+  // input and return the html <a> tag with \Text as the content.
+  static std::string genHTML(StringRef Text, StringRef DotFile,
+                             StringRef PDFFileName);
+
+  void handleFunctionCompare(StringRef Name, StringRef Prefix, StringRef PassID,
+                             StringRef Divider, bool InModule, unsigned Minor,
+                             const FuncDataT<DCData> &Before,
+                             const FuncDataT<DCData> &After);
+
+  unsigned N = 0;
+  std::unique_ptr<raw_fd_ostream> HTML;
+};
+
 /// This class provides an interface to register all the standard pass
 /// instrumentations and manages their state (if any).
 class StandardInstrumentations {
@@ -421,6 +503,7 @@ class StandardInstrumentations {
   IRChangedPrinter PrintChangedIR;
   PseudoProbeVerifier PseudoProbeVerification;
   InLineChangePrinter PrintChangedDiff;
+  DotCfgChangeReporter WebsiteChangeReporter;
   VerifyInstrumentation Verify;
 
   bool VerifyEach;
@@ -440,8 +523,12 @@ public:
 extern template class ChangeReporter<std::string>;
 extern template class TextChangeReporter<std::string>;
 
-extern template class ChangeReporter<ChangedIRData>;
-extern template class TextChangeReporter<ChangedIRData>;
+extern template class BlockDataT<EmptyData>;
+extern template class FuncDataT<EmptyData>;
+extern template class IRDataT<EmptyData>;
+extern template class ChangeReporter<IRDataT<EmptyData>>;
+extern template class TextChangeReporter<IRDataT<EmptyData>>;
+extern template class IRComparer<EmptyData>;
 
 } // namespace llvm
 
diff --git a/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h b/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h
index 8f336c13af61..d3a5d44ce8dd 100644
--- a/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h
+++ b/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h
@@ -693,8 +693,9 @@ public:
 /// An iterator over the \c LineCoverageStats objects for lines described by
 /// a \c CoverageData instance.
 class LineCoverageIterator
-    : public iterator_facade_base<
-          LineCoverageIterator, std::forward_iterator_tag, LineCoverageStats> {
+    : public iterator_facade_base<LineCoverageIterator,
+                                  std::forward_iterator_tag,
+                                  const LineCoverageStats> {
 public:
   LineCoverageIterator(const CoverageData &CD)
       : LineCoverageIterator(CD, CD.begin()->Line) {}
@@ -711,8 +712,6 @@ public:
 
   const LineCoverageStats &operator*() const { return Stats; }
 
-  LineCoverageStats &operator*() { return Stats; }
-
   LineCoverageIterator &operator++();
 
   LineCoverageIterator getEnd() const {
diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h
index 08a934e6985f..4395c2abb33e 100644
--- a/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/llvm/include/llvm/ProfileData/InstrProf.h
@@ -205,9 +205,9 @@ StringRef getFuncNameWithoutPrefix(StringRef PGOFuncName,
                                    StringRef FileName = "<unknown>");
 
 /// Given a vector of strings (function PGO names) \c NameStrs, the
-/// method generates a combined string \c Result thatis ready to be
+/// method generates a combined string \c Result that is ready to be
 /// serialized.  The \c Result string is comprised of three fields:
-/// The first field is the legnth of the uncompressed strings, and the
+/// The first field is the length of the uncompressed strings, and the
 /// the second field is the length of the zlib-compressed string.
 /// Both fields are encoded in ULEB128.  If \c doCompress is false, the
 ///  third field is the uncompressed strings; otherwise it is the
@@ -308,7 +308,8 @@ inline std::error_code make_error_code(instrprof_error E) {
 
 class InstrProfError : public ErrorInfo<InstrProfError> {
 public:
-  InstrProfError(instrprof_error Err) : Err(Err) {
+  InstrProfError(instrprof_error Err, const Twine &ErrStr = Twine())
+      : Err(Err), Msg(ErrStr.str()) {
     assert(Err != instrprof_error::success && "Not an error");
   }
 
@@ -321,6 +322,7 @@ public:
   }
 
   instrprof_error get() const { return Err; }
+  const std::string &getMessage() const { return Msg; }
 
   /// Consume an Error and return the raw enum value contained within it. The
   /// Error must either be a success value, or contain a single InstrProfError.
@@ -337,6 +339,7 @@ public:
 
 private:
   instrprof_error Err;
+  std::string Msg;
 };
 
 class SoftInstrProfErrors {
@@ -474,7 +477,8 @@ public:
   /// is used by the raw and text profile readers.
   Error addFuncName(StringRef FuncName) {
     if (FuncName.empty())
-      return make_error<InstrProfError>(instrprof_error::malformed);
+      return make_error<InstrProfError>(instrprof_error::malformed,
+                                        "function name is empty");
     auto Ins = NameTab.insert(FuncName);
     if (Ins.second) {
       MD5NameMap.push_back(std::make_pair(
@@ -1104,6 +1108,8 @@ namespace RawInstrProf {
 // Version 5: Bit 60 of FuncHash is reserved for the flag for the context
 // sensitive records.
 // Version 6: Added binary id.
+// Version 7: Reorder binary id and include version in signature.
+// Version 8: Use relative counter pointer.
 const uint64_t Version = INSTR_PROF_RAW_VERSION;
 
 template <class IntPtrT> inline uint64_t getMagic();
@@ -1142,8 +1148,8 @@ void getMemOPSizeRangeFromOption(StringRef Str, int64_t &RangeStart,
 
 // Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime
 // aware this is an ir_level profile so it can set the version flag.
-void createIRLevelProfileFlagVar(Module &M, bool IsCS,
-                                 bool InstrEntryBBEnabled);
+GlobalVariable *createIRLevelProfileFlagVar(Module &M, bool IsCS,
+                                            bool InstrEntryBBEnabled);
 
 // Create the variable for the profile file name.
 void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput);
diff --git a/llvm/include/llvm/ProfileData/InstrProfData.inc b/llvm/include/llvm/ProfileData/InstrProfData.inc
index 08a642469627..008b8dde5820 100644
--- a/llvm/include/llvm/ProfileData/InstrProfData.inc
+++ b/llvm/include/llvm/ProfileData/InstrProfData.inc
@@ -75,9 +75,7 @@ INSTR_PROF_DATA(const uint64_t, llvm::Type::getInt64Ty(Ctx), NameRef, \
 INSTR_PROF_DATA(const uint64_t, llvm::Type::getInt64Ty(Ctx), FuncHash, \
                 ConstantInt::get(llvm::Type::getInt64Ty(Ctx), \
                 Inc->getHash()->getZExtValue()))
-INSTR_PROF_DATA(const IntPtrT, llvm::Type::getInt64PtrTy(Ctx), CounterPtr, \
-                ConstantExpr::getBitCast(CounterPtr, \
-                llvm::Type::getInt64PtrTy(Ctx)))
+INSTR_PROF_DATA(const IntPtrT, IntPtrTy, CounterPtr, RelativeCounterPtr)
 /* This is used to map function pointers for the indirect call targets to
  * function name hashes during the conversion from raw to merged profile
  * data.
@@ -129,15 +127,16 @@ INSTR_PROF_VALUE_NODE(PtrToNodeT, llvm::Type::getInt8PtrTy(Ctx), Next, \
 #endif
 INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic())
 INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version())
+INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL))
 INSTR_PROF_RAW_HEADER(uint64_t, DataSize, DataSize)
 INSTR_PROF_RAW_HEADER(uint64_t, PaddingBytesBeforeCounters, PaddingBytesBeforeCounters)
 INSTR_PROF_RAW_HEADER(uint64_t, CountersSize, CountersSize)
 INSTR_PROF_RAW_HEADER(uint64_t, PaddingBytesAfterCounters, PaddingBytesAfterCounters)
 INSTR_PROF_RAW_HEADER(uint64_t, NamesSize,  NamesSize)
-INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta, (uintptr_t)CountersBegin)
+INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta,
+                      (uintptr_t)CountersBegin - (uintptr_t)DataBegin)
 INSTR_PROF_RAW_HEADER(uint64_t, NamesDelta, (uintptr_t)NamesBegin)
 INSTR_PROF_RAW_HEADER(uint64_t, ValueKindLast, IPVK_Last)
-INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL))
 #undef INSTR_PROF_RAW_HEADER
 /* INSTR_PROF_RAW_HEADER  end */
 
@@ -646,7 +645,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
         (uint64_t)'f' << 16 | (uint64_t)'R' << 8 | (uint64_t)129
 
 /* Raw profile format version (start from 1). */
-#define INSTR_PROF_RAW_VERSION 6
+#define INSTR_PROF_RAW_VERSION 8
 /* Indexed profile format version (start from 1). */
 #define INSTR_PROF_INDEX_VERSION 7
 /* Coverage mapping format version (start from 0). */
diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h
index 501c6f011d53..b62d4ff044a3 100644
--- a/llvm/include/llvm/ProfileData/InstrProfReader.h
+++ b/llvm/include/llvm/ProfileData/InstrProfReader.h
@@ -71,6 +71,7 @@ public:
 /// format. Provides an iterator over NamedInstrProfRecords.
 class InstrProfReader {
   instrprof_error LastError = instrprof_error::success;
+  std::string LastErrorMsg;
 
 public:
   InstrProfReader() = default;
@@ -114,14 +115,21 @@ protected:
   std::unique_ptr<InstrProfSymtab> Symtab;
 
   /// Set the current error and return same.
-  Error error(instrprof_error Err) {
+  Error error(instrprof_error Err, const std::string &ErrMsg = "") {
     LastError = Err;
+    LastErrorMsg = ErrMsg;
     if (Err == instrprof_error::success)
       return Error::success();
-    return make_error<InstrProfError>(Err);
+    return make_error<InstrProfError>(Err, ErrMsg);
   }
 
-  Error error(Error &&E) { return error(InstrProfError::take(std::move(E))); }
+  Error error(Error &&E) {
+    handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
+      LastError = IPE.get();
+      LastErrorMsg = IPE.getMessage();
+    });
+    return make_error<InstrProfError>(LastError, LastErrorMsg);
+  }
 
   /// Clear the current error and return a successful one.
   Error success() { return error(instrprof_error::success); }
@@ -136,7 +144,7 @@ public:
   /// Get the current error.
   Error getError() {
     if (hasError())
-      return make_error<InstrProfError>(LastError);
+      return make_error<InstrProfError>(LastError, LastErrorMsg);
     return Error::success();
   }
 
@@ -197,7 +205,7 @@ public:
 
 /// Reader for the raw instrprof binary format from runtime.
 ///
-/// This format is a raw memory dump of the instrumentation-baed profiling data
+/// This format is a raw memory dump of the instrumentation-based profiling data
 /// from the runtime.  It has no index.
 ///
 /// Templated on the unsigned type whose size matches pointers on the platform
diff --git a/llvm/include/llvm/ProfileData/ProfileCommon.h b/llvm/include/llvm/ProfileData/ProfileCommon.h
index f2d9ccc45fdc..ad92af22d92e 100644
--- a/llvm/include/llvm/ProfileData/ProfileCommon.h
+++ b/llvm/include/llvm/ProfileData/ProfileCommon.h
@@ -66,9 +66,9 @@ public:
 
   /// Find the summary entry for a desired percentile of counts.
   static const ProfileSummaryEntry &
-  getEntryForPercentile(SummaryEntryVector &DS, uint64_t Percentile);
-  static uint64_t getHotCountThreshold(SummaryEntryVector &DS);
-  static uint64_t getColdCountThreshold(SummaryEntryVector &DS);
+  getEntryForPercentile(const SummaryEntryVector &DS, uint64_t Percentile);
+  static uint64_t getHotCountThreshold(const SummaryEntryVector &DS);
+  static uint64_t getColdCountThreshold(const SummaryEntryVector &DS);
 };
 
 class InstrProfSummaryBuilder final : public ProfileSummaryBuilder {
@@ -92,8 +92,8 @@ public:
 
   void addRecord(const sampleprof::FunctionSamples &FS,
                  bool isCallsiteSample = false);
-  std::unique_ptr<ProfileSummary> computeSummaryForProfiles(
-      const StringMap<sampleprof::FunctionSamples> &Profiles);
+  std::unique_ptr<ProfileSummary>
+  computeSummaryForProfiles(const sampleprof::SampleProfileMap &Profiles);
   std::unique_ptr<ProfileSummary> getSummary();
 };
 
diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h
index 2f71bbc6bbbe..7ac9eccf8ac2 100644
--- a/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/llvm/include/llvm/ProfileData/SampleProf.h
@@ -29,10 +29,13 @@
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cstdint>
+#include <list>
 #include <map>
 #include <set>
+#include <sstream>
 #include <string>
 #include <system_error>
+#include <unordered_map>
 #include <utility>
 
 namespace llvm {
@@ -104,10 +107,10 @@ static inline uint64_t SPMagic(SampleProfileFormat Format = SPF_Binary) {
 /// current Format uses MD5 to represent the string.
 static inline StringRef getRepInFormat(StringRef Name, bool UseMD5,
                                        std::string &GUIDBuf) {
-  if (Name.empty())
+  if (Name.empty() || !UseMD5)
     return Name;
   GUIDBuf = std::to_string(Function::getGUID(Name));
-  return UseMD5 ? StringRef(GUIDBuf) : Name;
+  return GUIDBuf;
 }
 
 static inline uint64_t SPVersion() { return 103; }
@@ -122,13 +125,14 @@ enum SecType {
   SecProfileSymbolList = 3,
   SecFuncOffsetTable = 4,
   SecFuncMetadata = 5,
+  SecCSNameTable = 6,
   // marker for the first type of profile.
   SecFuncProfileFirst = 32,
   SecLBRProfile = SecFuncProfileFirst
 };
 
 static inline std::string getSecName(SecType Type) {
-  switch (Type) {
+  switch ((int)Type) { // Avoid -Wcovered-switch-default
   case SecInValid:
     return "InvalidSection";
   case SecProfSummary:
@@ -141,10 +145,13 @@ static inline std::string getSecName(SecType Type) {
     return "FuncOffsetTableSection";
   case SecFuncMetadata:
     return "FunctionMetadata";
+  case SecCSNameTable:
+    return "CSNameTableSection";
   case SecLBRProfile:
     return "LBRProfileSection";
+  default:
+    return "UnknownSection";
   }
-  llvm_unreachable("A SecType has no name for output");
 }
 
 // Entry type of section header table used by SampleProfileExtBinaryBaseReader
@@ -202,6 +209,13 @@ enum class SecFuncMetadataFlags : uint32_t {
   SecFlagHasAttribute = (1 << 1)
 };
 
+enum class SecFuncOffsetFlags : uint32_t {
+  SecFlagInvalid = 0,
+  // Store function offsets in an order of contexts. The order ensures that
+  // callee contexts of a given context laid out next to it.
+  SecFlagOrdered = (1 << 0),
+};
+
 // Verify section specific flag is used for the correct section.
 template <class SecFlagType>
 static inline void verifySecFlag(SecType Type, SecFlagType Flag) {
@@ -222,6 +236,8 @@ static inline void verifySecFlag(SecType Type, SecFlagType Flag) {
     IsFlagLegal = std::is_same<SecFuncMetadataFlags, SecFlagType>();
     break;
   default:
+  case SecFuncOffsetTable:
+    IsFlagLegal = std::is_same<SecFuncOffsetFlags, SecFlagType>();
     break;
   }
   if (!IsFlagLegal)
@@ -396,54 +412,123 @@ enum ContextAttributeMask {
   ContextShouldBeInlined = 0x2, // Leaf of context should be inlined
 };
 
+// Represents a context frame with function name and line location
+struct SampleContextFrame {
+  StringRef FuncName;
+  LineLocation Location;
+
+  SampleContextFrame() : Location(0, 0) {}
+
+  SampleContextFrame(StringRef FuncName, LineLocation Location)
+      : FuncName(FuncName), Location(Location) {}
+
+  bool operator==(const SampleContextFrame &That) const {
+    return Location == That.Location && FuncName == That.FuncName;
+  }
+
+  bool operator!=(const SampleContextFrame &That) const {
+    return !(*this == That);
+  }
+
+  std::string toString(bool OutputLineLocation) const {
+    std::ostringstream OContextStr;
+    OContextStr << FuncName.str();
+    if (OutputLineLocation) {
+      OContextStr << ":" << Location.LineOffset;
+      if (Location.Discriminator)
+        OContextStr << "." << Location.Discriminator;
+    }
+    return OContextStr.str();
+  }
+};
+
+static inline hash_code hash_value(const SampleContextFrame &arg) {
+  return hash_combine(arg.FuncName, arg.Location.LineOffset,
+                      arg.Location.Discriminator);
+}
+
+using SampleContextFrameVector = SmallVector<SampleContextFrame, 10>;
+using SampleContextFrames = ArrayRef<SampleContextFrame>;
+
+struct SampleContextFrameHash {
+  uint64_t operator()(const SampleContextFrameVector &S) const {
+    return hash_combine_range(S.begin(), S.end());
+  }
+};
+
 // Sample context for FunctionSamples. It consists of the calling context,
 // the function name and context state. Internally sample context is represented
-// using StringRef, which is also the input for constructing a `SampleContext`.
+// using ArrayRef, which is also the input for constructing a `SampleContext`.
 // It can accept and represent both full context string as well as context-less
 // function name.
-// Example of full context string (note the wrapping `[]`):
-//    `[main:3 @ _Z5funcAi:1 @ _Z8funcLeafi]`
-// Example of context-less function name (same as AutoFDO):
-//    `_Z8funcLeafi`
+// For a CS profile, a full context vector can look like:
+//    `main:3 _Z5funcAi:1 _Z8funcLeafi`
+// For a base CS profile without calling context, the context vector should only
+// contain the leaf frame name.
+// For a non-CS profile, the context vector should be empty.
 class SampleContext {
 public:
   SampleContext() : State(UnknownContext), Attributes(ContextNone) {}
-  SampleContext(StringRef ContextStr, ContextStateMask CState = UnknownContext)
-      : Attributes(ContextNone) {
-    setContext(ContextStr, CState);
-  }
 
-  // Promote context by removing top frames (represented by `ContextStrToRemove`).
-  // Note that with string representation of context, the promotion is effectively
-  // a substr operation with `ContextStrToRemove` removed from left.
-  void promoteOnPath(StringRef ContextStrToRemove) {
-    assert(FullContext.startswith(ContextStrToRemove));
+  SampleContext(StringRef Name)
+      : Name(Name), State(UnknownContext), Attributes(ContextNone) {}
 
-    // Remove leading context and frame separator " @ ".
-    FullContext = FullContext.substr(ContextStrToRemove.size() + 3);
-    CallingContext = CallingContext.substr(ContextStrToRemove.size() + 3);
+  SampleContext(SampleContextFrames Context,
+                ContextStateMask CState = RawContext)
+      : Attributes(ContextNone) {
+    assert(!Context.empty() && "Context is empty");
+    setContext(Context, CState);
   }
 
-  // Split the top context frame (left-most substr) from context.
-  static std::pair<StringRef, StringRef>
-  splitContextString(StringRef ContextStr) {
-    return ContextStr.split(" @ ");
+  // Give a context string, decode and populate internal states like
+  // Function name, Calling context and context state. Example of input
+  // `ContextStr`: `[main:3 @ _Z5funcAi:1 @ _Z8funcLeafi]`
+  SampleContext(StringRef ContextStr,
+                std::list<SampleContextFrameVector> &CSNameTable,
+                ContextStateMask CState = RawContext)
+      : Attributes(ContextNone) {
+    assert(!ContextStr.empty());
+    // Note that `[]` wrapped input indicates a full context string, otherwise
+    // it's treated as context-less function name only.
+    bool HasContext = ContextStr.startswith("[");
+    if (!HasContext) {
+      State = UnknownContext;
+      Name = ContextStr;
+    } else {
+      CSNameTable.emplace_back();
+      SampleContextFrameVector &Context = CSNameTable.back();
+      createCtxVectorFromStr(ContextStr, Context);
+      setContext(Context, CState);
+    }
   }
 
-  // Reconstruct a new context with the last k frames, return the context-less
-  // name if K = 1
-  StringRef getContextWithLastKFrames(uint32_t K) {
-    if (K == 1)
-      return getNameWithoutContext();
-
-    size_t I = FullContext.size();
-    while (K--) {
-      I = FullContext.find_last_of(" @ ", I);
-      if (I == StringRef::npos)
-        return FullContext;
-      I -= 2;
+  /// Create a context vector from a given context string and save it in
+  /// `Context`.
+  static void createCtxVectorFromStr(StringRef ContextStr,
+                                     SampleContextFrameVector &Context) {
+    // Remove encapsulating '[' and ']' if any
+    ContextStr = ContextStr.substr(1, ContextStr.size() - 2);
+    StringRef ContextRemain = ContextStr;
+    StringRef ChildContext;
+    StringRef CalleeName;
+    while (!ContextRemain.empty()) {
+      auto ContextSplit = ContextRemain.split(" @ ");
+      ChildContext = ContextSplit.first;
+      ContextRemain = ContextSplit.second;
+      LineLocation CallSiteLoc(0, 0);
+      decodeContextString(ChildContext, CalleeName, CallSiteLoc);
+      Context.emplace_back(CalleeName, CallSiteLoc);
     }
-    return FullContext.slice(I + 3, StringRef::npos);
+  }
+
+  // Promote context by removing top frames with the length of
+  // `ContextFramesToRemove`. Note that with array representation of context,
+  // the promotion is effectively a slice operation with first
+  // `ContextFramesToRemove` elements removed from left.
+  void promoteOnPath(uint32_t ContextFramesToRemove) {
+    assert(ContextFramesToRemove <= FullContext.size() &&
+           "Cannot remove more than the whole context");
+    FullContext = FullContext.drop_front(ContextFramesToRemove);
   }
 
   // Decode context string for a frame to get function name and location.
@@ -469,7 +554,7 @@ public:
     }
   }
 
-  operator StringRef() const { return FullContext; }
+  operator SampleContextFrames() const { return FullContext; }
   bool hasAttribute(ContextAttributeMask A) { return Attributes & (uint32_t)A; }
   void setAttribute(ContextAttributeMask A) { Attributes |= (uint32_t)A; }
   uint32_t getAllAttributes() { return Attributes; }
@@ -478,60 +563,114 @@ public:
   void setState(ContextStateMask S) { State |= (uint32_t)S; }
   void clearState(ContextStateMask S) { State &= (uint32_t)~S; }
   bool hasContext() const { return State != UnknownContext; }
-  bool isBaseContext() const { return CallingContext.empty(); }
-  StringRef getNameWithoutContext() const { return Name; }
-  StringRef getCallingContext() const { return CallingContext; }
-  StringRef getNameWithContext() const { return FullContext; }
-
-private:
-  // Give a context string, decode and populate internal states like
-  // Function name, Calling context and context state. Example of input
-  // `ContextStr`: `[main:3 @ _Z5funcAi:1 @ _Z8funcLeafi]`
-  void setContext(StringRef ContextStr, ContextStateMask CState) {
-    assert(!ContextStr.empty());
-    // Note that `[]` wrapped input indicates a full context string, otherwise
-    // it's treated as context-less function name only.
-    bool HasContext = ContextStr.startswith("[");
-    if (!HasContext && CState == UnknownContext) {
-      State = UnknownContext;
-      Name = FullContext = ContextStr;
-    } else {
-      // Assume raw context profile if unspecified
-      if (CState == UnknownContext)
-        State = RawContext;
-      else
-        State = CState;
-
-      // Remove encapsulating '[' and ']' if any
-      if (HasContext)
-        FullContext = ContextStr.substr(1, ContextStr.size() - 2);
-      else
-        FullContext = ContextStr;
-
-      // Caller is to the left of callee in context string
-      auto NameContext = FullContext.rsplit(" @ ");
-      if (NameContext.second.empty()) {
-        Name = NameContext.first;
-        CallingContext = NameContext.second;
-      } else {
-        Name = NameContext.second;
-        CallingContext = NameContext.first;
+  bool isBaseContext() const { return FullContext.size() == 1; }
+  StringRef getName() const { return Name; }
+  SampleContextFrames getContextFrames() const { return FullContext; }
+
+  static std::string getContextString(SampleContextFrames Context,
+                                      bool IncludeLeafLineLocation = false) {
+    std::ostringstream OContextStr;
+    for (uint32_t I = 0; I < Context.size(); I++) {
+      if (OContextStr.str().size()) {
+        OContextStr << " @ ";
       }
+      OContextStr << Context[I].toString(I != Context.size() - 1 ||
+                                         IncludeLeafLineLocation);
     }
+    return OContextStr.str();
+  }
+
+  std::string toString() const {
+    if (!hasContext())
+      return Name.str();
+    return getContextString(FullContext, false);
+  }
+
+  uint64_t getHashCode() const {
+    return hasContext() ? hash_value(getContextFrames())
+                        : hash_value(getName());
+  }
+
+  /// Set the name of the function.
+  void setName(StringRef FunctionName) {
+    assert(FullContext.empty() &&
+           "setName should only be called for non-CS profile");
+    Name = FunctionName;
+  }
+
+  void setContext(SampleContextFrames Context,
+                  ContextStateMask CState = RawContext) {
+    assert(CState != UnknownContext);
+    FullContext = Context;
+    Name = Context.back().FuncName;
+    State = CState;
+  }
+
+  bool operator==(const SampleContext &That) const {
+    return State == That.State && Name == That.Name &&
+           FullContext == That.FullContext;
+  }
+
+  bool operator!=(const SampleContext &That) const { return !(*this == That); }
+
+  bool operator<(const SampleContext &That) const {
+    if (State != That.State)
+      return State < That.State;
+
+    if (!hasContext()) {
+      return (Name.compare(That.Name)) == -1;
+    }
+
+    uint64_t I = 0;
+    while (I < std::min(FullContext.size(), That.FullContext.size())) {
+      auto &Context1 = FullContext[I];
+      auto &Context2 = That.FullContext[I];
+      auto V = Context1.FuncName.compare(Context2.FuncName);
+      if (V)
+        return V == -1;
+      if (Context1.Location != Context2.Location)
+        return Context1.Location < Context2.Location;
+      I++;
+    }
+
+    return FullContext.size() < That.FullContext.size();
+  }
+
+  struct Hash {
+    uint64_t operator()(const SampleContext &Context) const {
+      return Context.getHashCode();
+    }
+  };
+
+  bool IsPrefixOf(const SampleContext &That) const {
+    auto ThisContext = FullContext;
+    auto ThatContext = That.FullContext;
+    if (ThatContext.size() < ThisContext.size())
+      return false;
+    ThatContext = ThatContext.take_front(ThisContext.size());
+    // Compare Leaf frame first
+    if (ThisContext.back().FuncName != ThatContext.back().FuncName)
+      return false;
+    // Compare leading context
+    return ThisContext.drop_back() == ThatContext.drop_back();
   }
 
-  // Full context string including calling context and leaf function name
-  StringRef FullContext;
-  // Function name for the associated sample profile
+private:
+  /// Mangled name of the function.
   StringRef Name;
-  // Calling context (leaf function excluded) for the associated sample profile
-  StringRef CallingContext;
+  // Full context including calling context and leaf function name
+  SampleContextFrames FullContext;
   // State of the associated sample profile
   uint32_t State;
   // Attribute of the associated sample profile
   uint32_t Attributes;
 };
 
+static inline hash_code hash_value(const SampleContext &arg) {
+  return arg.hasContext() ? hash_value(arg.getContextFrames())
+                          : hash_value(arg.getName());
+}
+
 class FunctionSamples;
 class SampleProfileReaderItaniumRemapper;
 
@@ -592,6 +731,20 @@ public:
     return BodySamples[LineLocation(Index, 0)].merge(S, Weight);
   }
 
+  // Accumulate all body samples to set total samples.
+  void updateTotalSamples() {
+    setTotalSamples(0);
+    for (const auto &I : BodySamples)
+      addTotalSamples(I.second.getSamples());
+
+    for (auto &I : CallsiteSamples) {
+      for (auto &CS : I.second) {
+        CS.second.updateTotalSamples();
+        addTotalSamples(CS.second.getTotalSamples());
+      }
+    }
+  }
+
   /// Return the number of samples collected at the given location.
   /// Each location is specified by \p LineOffset and \p Discriminator.
   /// If the location is not found in profile, return error.
@@ -709,10 +862,9 @@ public:
   /// Optionally scale samples by \p Weight.
   sampleprof_error merge(const FunctionSamples &Other, uint64_t Weight = 1) {
     sampleprof_error Result = sampleprof_error::success;
-    Name = Other.getName();
     if (!GUIDToFuncNameMap)
       GUIDToFuncNameMap = Other.GUIDToFuncNameMap;
-    if (Context.getNameWithContext().empty())
+    if (Context.getName().empty())
       Context = Other.getContext();
     if (FunctionHash == 0) {
       // Set the function hash code for the target profile.
@@ -758,7 +910,7 @@ public:
     };
     if (isDeclaration(SymbolMap.lookup(getFuncName()))) {
       // Add to the import list only when it's defined out of module.
-      S.insert(getGUID(Name));
+      S.insert(getGUID(getName()));
     }
     // Import hot CallTargets, which may not be available in IR because full
     // profile annotation cannot be done until backend compilation in ThinLTO.
@@ -775,18 +927,13 @@ public:
   }
 
   /// Set the name of the function.
-  void setName(StringRef FunctionName) { Name = FunctionName; }
+  void setName(StringRef FunctionName) { Context.setName(FunctionName); }
 
   /// Return the function name.
-  StringRef getName() const { return Name; }
-
-  /// Return function name with context.
-  StringRef getNameWithContext() const {
-    return FunctionSamples::ProfileIsCS ? Context.getNameWithContext() : Name;
-  }
+  StringRef getName() const { return Context.getName(); }
 
   /// Return the original function name.
-  StringRef getFuncName() const { return getFuncName(Name); }
+  StringRef getFuncName() const { return getFuncName(getName()); }
 
   void setFunctionHash(uint64_t Hash) { FunctionHash = Hash; }
 
@@ -913,9 +1060,6 @@ public:
   void findAllNames(DenseSet<StringRef> &NameSet) const;
 
 private:
-  /// Mangled name of the function.
-  StringRef Name;
-
   /// CFG hash value for the function.
   uint64_t FunctionHash = 0;
 
@@ -961,6 +1105,14 @@ private:
 
 raw_ostream &operator<<(raw_ostream &OS, const FunctionSamples &FS);
 
+using SampleProfileMap =
+    std::unordered_map<SampleContext, FunctionSamples, SampleContext::Hash>;
+
+using NameFunctionSamples = std::pair<SampleContext, const FunctionSamples *>;
+
+void sortFuncProfiles(const SampleProfileMap &ProfileMap,
+                      std::vector<NameFunctionSamples> &SortedProfiles);
+
 /// Sort a LocationT->SampleT map by LocationT.
 ///
 /// It produces a sorted list of <LocationT, SampleT> records by ascending
@@ -989,18 +1141,24 @@ private:
 /// sure ProfileMap's key is consistent with FunctionSample's name/context.
 class SampleContextTrimmer {
 public:
-  SampleContextTrimmer(StringMap<FunctionSamples> &Profiles)
-      : ProfileMap(Profiles){};
-  // Trim and merge cold context profile when requested.
+  SampleContextTrimmer(SampleProfileMap &Profiles) : ProfileMap(Profiles){};
+  // Trim and merge cold context profile when requested. TrimBaseProfileOnly
+  // should only be effective when TrimColdContext is true. On top of
+  // TrimColdContext, TrimBaseProfileOnly can be used to specify to trim all
+  // cold profiles or only cold base profiles. Trimming base profiles only is
+  // mainly to honor the preinliner decsion. Note that when MergeColdContext is
+  // true, preinliner decsion is not honored anyway so TrimBaseProfileOnly will
+  // be ignored.
   void trimAndMergeColdContextProfiles(uint64_t ColdCountThreshold,
                                        bool TrimColdContext,
                                        bool MergeColdContext,
-                                       uint32_t ColdContextFrameLength);
+                                       uint32_t ColdContextFrameLength,
+                                       bool TrimBaseProfileOnly);
   // Canonicalize context profile name and attributes.
   void canonicalizeContextProfiles();
 
 private:
-  StringMap<FunctionSamples> &ProfileMap;
+  SampleProfileMap &ProfileMap;
 };
 
 /// ProfileSymbolList records the list of function symbols shown up
@@ -1045,6 +1203,22 @@ private:
 };
 
 } // end namespace sampleprof
+
+using namespace sampleprof;
+// Provide DenseMapInfo for SampleContext.
+template <> struct DenseMapInfo<SampleContext> {
+  static inline SampleContext getEmptyKey() { return SampleContext(); }
+
+  static inline SampleContext getTombstoneKey() { return SampleContext("@"); }
+
+  static unsigned getHashValue(const SampleContext &Val) {
+    return Val.getHashCode();
+  }
+
+  static bool isEqual(const SampleContext &LHS, const SampleContext &RHS) {
+    return LHS == RHS;
+  }
+};
 } // end namespace llvm
 
 #endif // LLVM_PROFILEDATA_SAMPLEPROF_H
diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h
index 2d5925bdb2b4..e6d31f1b9098 100644
--- a/llvm/include/llvm/ProfileData/SampleProfReader.h
+++ b/llvm/include/llvm/ProfileData/SampleProfReader.h
@@ -242,9 +242,11 @@
 #include "llvm/Support/SymbolRemappingReader.h"
 #include <algorithm>
 #include <cstdint>
+#include <list>
 #include <memory>
 #include <string>
 #include <system_error>
+#include <unordered_set>
 #include <vector>
 
 namespace llvm {
@@ -380,8 +382,8 @@ public:
   /// The implementaion to read sample profiles from the associated file.
   virtual std::error_code readImpl() = 0;
 
-  /// Print the profile for \p FName on stream \p OS.
-  void dumpFunctionProfile(StringRef FName, raw_ostream &OS = dbgs());
+  /// Print the profile for \p FContext on stream \p OS.
+  void dumpFunctionProfile(SampleContext FContext, raw_ostream &OS = dbgs());
 
   /// Collect functions with definitions in Module M. For reader which
   /// support loading function profiles on demand, return true when the
@@ -407,6 +409,13 @@ public:
     std::string FGUID;
     StringRef CanonName = FunctionSamples::getCanonicalFnName(F);
     CanonName = getRepInFormat(CanonName, useMD5(), FGUID);
+    auto It = Profiles.find(CanonName);
+    if (It != Profiles.end())
+      return &It->second;
+    if (!FGUID.empty()) {
+      assert(useMD5() && "New name should only be generated for md5 profile");
+      CanonName = *MD5NameBuffer.insert(FGUID).first;
+    }
     return &Profiles[CanonName];
   }
 
@@ -429,7 +438,7 @@ public:
   }
 
   /// Return all the profiles.
-  StringMap<FunctionSamples> &getProfiles() { return Profiles; }
+  SampleProfileMap &getProfiles() { return Profiles; }
 
   /// Report a parse error message.
   void reportError(int64_t LineNumber, const Twine &Msg) const {
@@ -495,7 +504,7 @@ protected:
   /// The profile of every function executed at runtime is collected
   /// in the structure FunctionSamples. This maps function objects
   /// to their corresponding profiles.
-  StringMap<FunctionSamples> Profiles;
+  SampleProfileMap Profiles;
 
   /// LLVM context used to emit diagnostics.
   LLVMContext &Ctx;
@@ -503,6 +512,10 @@ protected:
   /// Memory buffer holding the profile file.
   std::unique_ptr<MemoryBuffer> Buffer;
 
+  /// Extra name buffer holding names created on demand.
+  /// This should only be needed for md5 profiles.
+  std::unordered_set<std::string> MD5NameBuffer;
+
   /// Profile summary information.
   std::unique_ptr<ProfileSummary> Summary;
 
@@ -555,6 +568,11 @@ public:
 
   /// Return true if \p Buffer is in the format supported by this class.
   static bool hasFormat(const MemoryBuffer &Buffer);
+
+private:
+  /// CSNameTable is used to save full context vectors. This serves as an
+  /// underlying immutable buffer for all clients.
+  std::list<SampleContextFrameVector> CSNameTable;
 };
 
 class SampleProfileReaderBinary : public SampleProfileReader {
@@ -626,6 +644,7 @@ protected:
 
   /// Read a string indirectly via the name table.
   virtual ErrorOr<StringRef> readStringFromTable();
+  virtual ErrorOr<SampleContext> readSampleContextFromTable();
 
 private:
   std::error_code readSummaryEntry(std::vector<ProfileSummaryEntry> &Entries);
@@ -683,6 +702,7 @@ protected:
   std::error_code readFuncProfiles();
   std::error_code readMD5NameTable();
   std::error_code readNameTableSec(bool IsMD5);
+  std::error_code readCSNameTableSec();
   std::error_code readProfileSymbolList();
 
   virtual std::error_code readHeader() override;
@@ -692,12 +712,19 @@ protected:
   // placeholder for subclasses to dispatch their own section readers.
   virtual std::error_code readCustomSection(const SecHdrTableEntry &Entry) = 0;
   virtual ErrorOr<StringRef> readStringFromTable() override;
+  virtual ErrorOr<SampleContext> readSampleContextFromTable() override;
+  ErrorOr<SampleContextFrames> readContextFromTable();
 
   std::unique_ptr<ProfileSymbolList> ProfSymList;
 
-  /// The table mapping from function name to the offset of its FunctionSample
-  /// towards file start.
-  DenseMap<StringRef, uint64_t> FuncOffsetTable;
+  /// The table mapping from function context to the offset of its
+  /// FunctionSample towards file start.
+  DenseMap<SampleContext, uint64_t> FuncOffsetTable;
+
+  /// Function offset mapping ordered by contexts.
+  std::unique_ptr<std::vector<std::pair<SampleContext, uint64_t>>>
+      OrderedFuncOffsets;
+
   /// The set containing the functions to use when compiling a module.
   DenseSet<StringRef> FuncsToUse;
 
@@ -716,10 +743,16 @@ protected:
   /// the lifetime of MD5StringBuf is not shorter than that of NameTable.
   std::unique_ptr<std::vector<std::string>> MD5StringBuf;
 
+  /// CSNameTable is used to save full context vectors. This serves as an
+  /// underlying immutable buffer for all clients.
+  std::unique_ptr<const std::vector<SampleContextFrameVector>> CSNameTable;
+
   /// If SkipFlatProf is true, skip the sections with
   /// SecFlagFlat flag.
   bool SkipFlatProf = false;
 
+  bool FuncOffsetsOrdered = false;
+
 public:
   SampleProfileReaderExtBinaryBase(std::unique_ptr<MemoryBuffer> B,
                                    LLVMContext &C, SampleProfileFormat Format)
@@ -753,6 +786,8 @@ private:
   virtual std::error_code verifySPMagic(uint64_t Magic) override;
   virtual std::error_code
   readCustomSection(const SecHdrTableEntry &Entry) override {
+    // Update the data reader pointer to the end of the section.
+    Data = End;
     return sampleprof_error::success;
   };
 
diff --git a/llvm/include/llvm/ProfileData/SampleProfWriter.h b/llvm/include/llvm/ProfileData/SampleProfWriter.h
index 107f7a730a3c..773beac24ebc 100644
--- a/llvm/include/llvm/ProfileData/SampleProfWriter.h
+++ b/llvm/include/llvm/ProfileData/SampleProfWriter.h
@@ -52,7 +52,7 @@ public:
   /// Write all the sample profiles in the given map of samples.
   ///
   /// \returns status code of the file update operation.
-  virtual std::error_code write(const StringMap<FunctionSamples> &ProfileMap);
+  virtual std::error_code write(const SampleProfileMap &ProfileMap);
 
   raw_ostream &getOutputStream() { return *OutputStream; }
 
@@ -78,12 +78,10 @@ protected:
       : OutputStream(std::move(OS)) {}
 
   /// Write a file header for the profile file.
-  virtual std::error_code
-  writeHeader(const StringMap<FunctionSamples> &ProfileMap) = 0;
+  virtual std::error_code writeHeader(const SampleProfileMap &ProfileMap) = 0;
 
   // Write function profiles to the profile file.
-  virtual std::error_code
-  writeFuncProfiles(const StringMap<FunctionSamples> &ProfileMap);
+  virtual std::error_code writeFuncProfiles(const SampleProfileMap &ProfileMap);
 
   /// Output stream where to emit the profile to.
   std::unique_ptr<raw_ostream> OutputStream;
@@ -92,7 +90,7 @@ protected:
   std::unique_ptr<ProfileSummary> Summary;
 
   /// Compute summary for this profile.
-  void computeSummary(const StringMap<FunctionSamples> &ProfileMap);
+  void computeSummary(const SampleProfileMap &ProfileMap);
 
   /// Profile format.
   SampleProfileFormat Format = SPF_None;
@@ -107,8 +105,7 @@ protected:
   SampleProfileWriterText(std::unique_ptr<raw_ostream> &OS)
       : SampleProfileWriter(OS), Indent(0) {}
 
-  std::error_code
-  writeHeader(const StringMap<FunctionSamples> &ProfileMap) override {
+  std::error_code writeHeader(const SampleProfileMap &ProfileMap) override {
     return sampleprof_error::success;
   }
 
@@ -132,19 +129,22 @@ public:
   virtual std::error_code writeSample(const FunctionSamples &S) override;
 
 protected:
+  virtual MapVector<StringRef, uint32_t> &getNameTable() { return NameTable; }
   virtual std::error_code writeMagicIdent(SampleProfileFormat Format);
   virtual std::error_code writeNameTable();
   virtual std::error_code
-  writeHeader(const StringMap<FunctionSamples> &ProfileMap) override;
+  writeHeader(const SampleProfileMap &ProfileMap) override;
   std::error_code writeSummary();
-  std::error_code writeNameIdx(StringRef FName, bool IsContextName = false);
+  virtual std::error_code writeContextIdx(const SampleContext &Context);
+  std::error_code writeNameIdx(StringRef FName);
   std::error_code writeBody(const FunctionSamples &S);
-  inline void stablizeNameTable(std::set<StringRef> &V);
+  inline void stablizeNameTable(MapVector<StringRef, uint32_t> &NameTable,
+                                std::set<StringRef> &V);
 
   MapVector<StringRef, uint32_t> NameTable;
-  std::unordered_set<std::string> BracketedContextStr;
 
-  void addName(StringRef FName, bool IsContextName = false);
+  void addName(StringRef FName);
+  virtual void addContext(const SampleContext &Context);
   void addNames(const FunctionSamples &S);
 
 private:
@@ -168,6 +168,7 @@ const std::array<SmallVector<SecHdrTableEntry, 8>, NumOfLayout>
         // DefaultLayout
         SmallVector<SecHdrTableEntry, 8>({{SecProfSummary, 0, 0, 0, 0},
                                           {SecNameTable, 0, 0, 0, 0},
+                                          {SecCSNameTable, 0, 0, 0, 0},
                                           {SecFuncOffsetTable, 0, 0, 0, 0},
                                           {SecLBRProfile, 0, 0, 0, 0},
                                           {SecProfileSymbolList, 0, 0, 0, 0},
@@ -190,8 +191,7 @@ const std::array<SmallVector<SecHdrTableEntry, 8>, NumOfLayout>
 class SampleProfileWriterExtBinaryBase : public SampleProfileWriterBinary {
   using SampleProfileWriterBinary::SampleProfileWriterBinary;
 public:
-  virtual std::error_code
-  write(const StringMap<FunctionSamples> &ProfileMap) override;
+  virtual std::error_code write(const SampleProfileMap &ProfileMap) override;
 
   virtual void setToCompressAllSections() override;
   void setToCompressSection(SecType Type);
@@ -246,29 +246,32 @@ protected:
     addSecFlag(SectionHdrLayout[SectionIdx], Flag);
   }
 
+  virtual void addContext(const SampleContext &Context) override;
+
   // placeholder for subclasses to dispatch their own section writers.
   virtual std::error_code writeCustomSection(SecType Type) = 0;
   // Verify the SecLayout is supported by the format.
   virtual void verifySecLayout(SectionLayout SL) = 0;
 
   // specify the order to write sections.
-  virtual std::error_code
-  writeSections(const StringMap<FunctionSamples> &ProfileMap) = 0;
+  virtual std::error_code writeSections(const SampleProfileMap &ProfileMap) = 0;
 
   // Dispatch section writer for each section. \p LayoutIdx is the sequence
   // number indicating where the section is located in SectionHdrLayout.
-  virtual std::error_code
-  writeOneSection(SecType Type, uint32_t LayoutIdx,
-                  const StringMap<FunctionSamples> &ProfileMap);
+  virtual std::error_code writeOneSection(SecType Type, uint32_t LayoutIdx,
+                                          const SampleProfileMap &ProfileMap);
 
   // Helper function to write name table.
   virtual std::error_code writeNameTable() override;
+  virtual std::error_code
+  writeContextIdx(const SampleContext &Context) override;
+  std::error_code writeCSNameIdx(const SampleContext &Context);
+  std::error_code writeCSNameTableSection();
 
-  std::error_code writeFuncMetadata(const StringMap<FunctionSamples> &Profiles);
+  std::error_code writeFuncMetadata(const SampleProfileMap &Profiles);
 
   // Functions to write various kinds of sections.
-  std::error_code
-  writeNameTableSection(const StringMap<FunctionSamples> &ProfileMap);
+  std::error_code writeNameTableSection(const SampleProfileMap &ProfileMap);
   std::error_code writeFuncOffsetTable();
   std::error_code writeProfileSymbolListSection();
 
@@ -289,7 +292,7 @@ private:
   void allocSecHdrTable();
   std::error_code writeSecHdrTable();
   virtual std::error_code
-  writeHeader(const StringMap<FunctionSamples> &ProfileMap) override;
+  writeHeader(const SampleProfileMap &ProfileMap) override;
   std::error_code compressAndOutput();
 
   // We will swap the raw_ostream held by LocalBufStream and that
@@ -312,12 +315,16 @@ private:
   // be read.
   std::vector<SecHdrTableEntry> SecHdrTable;
 
-  // FuncOffsetTable maps function name to its profile offset in SecLBRProfile
-  // section. It is used to load function profile on demand.
-  MapVector<StringRef, uint64_t> FuncOffsetTable;
+  // FuncOffsetTable maps function context to its profile offset in
+  // SecLBRProfile section. It is used to load function profile on demand.
+  MapVector<SampleContext, uint64_t> FuncOffsetTable;
   // Whether to use MD5 to represent string.
   bool UseMD5 = false;
 
+  /// CSNameTable maps function context to its offset in SecCSNameTable section.
+  /// The offset will be used everywhere where the context is referenced.
+  MapVector<SampleContext, uint32_t> CSNameTable;
+
   ProfileSymbolList *ProfSymList = nullptr;
 };
 
@@ -327,13 +334,11 @@ public:
       : SampleProfileWriterExtBinaryBase(OS) {}
 
 private:
-  std::error_code
-  writeDefaultLayout(const StringMap<FunctionSamples> &ProfileMap);
-  std::error_code
-  writeCtxSplitLayout(const StringMap<FunctionSamples> &ProfileMap);
+  std::error_code writeDefaultLayout(const SampleProfileMap &ProfileMap);
+  std::error_code writeCtxSplitLayout(const SampleProfileMap &ProfileMap);
 
   virtual std::error_code
-  writeSections(const StringMap<FunctionSamples> &ProfileMap) override;
+  writeSections(const SampleProfileMap &ProfileMap) override;
 
   virtual std::error_code writeCustomSection(SecType Type) override {
     return sampleprof_error::success;
@@ -380,8 +385,7 @@ class SampleProfileWriterCompactBinary : public SampleProfileWriterBinary {
 
 public:
   virtual std::error_code writeSample(const FunctionSamples &S) override;
-  virtual std::error_code
-  write(const StringMap<FunctionSamples> &ProfileMap) override;
+  virtual std::error_code write(const SampleProfileMap &ProfileMap) override;
 
 protected:
   /// The table mapping from function name to the offset of its FunctionSample
@@ -392,7 +396,7 @@ protected:
   uint64_t TableOffset;
   virtual std::error_code writeNameTable() override;
   virtual std::error_code
-  writeHeader(const StringMap<FunctionSamples> &ProfileMap) override;
+  writeHeader(const SampleProfileMap &ProfileMap) override;
   std::error_code writeFuncOffsetTable();
 };
 
diff --git a/llvm/include/llvm/Support/AArch64TargetParser.def b/llvm/include/llvm/Support/AArch64TargetParser.def
index ae2fc673c54e..b3cfb71601f1 100644
--- a/llvm/include/llvm/Support/AArch64TargetParser.def
+++ b/llvm/include/llvm/Support/AArch64TargetParser.def
@@ -58,6 +58,24 @@ AARCH64_ARCH("armv8.7-a", ARMV8_7A, "8.7-A", "v8.7a",
               AArch64::AEK_RDM | AArch64::AEK_RCPC | AArch64::AEK_DOTPROD |
               AArch64::AEK_SM4 | AArch64::AEK_SHA3 | AArch64::AEK_BF16 |
               AArch64::AEK_SHA2 | AArch64::AEK_AES | AArch64::AEK_I8MM))
+AARCH64_ARCH("armv9-a",   ARMV9A, "9-A", "v9a",
+             ARMBuildAttrs::CPUArch::v8_A, FK_NEON_FP_ARMV8,
+             (AArch64::AEK_CRC | AArch64::AEK_FP |
+              AArch64::AEK_SIMD | AArch64::AEK_RAS | AArch64::AEK_LSE |
+              AArch64::AEK_RDM | AArch64::AEK_RCPC | AArch64::AEK_DOTPROD |
+              AArch64::AEK_SVE2))
+AARCH64_ARCH("armv9.1-a", ARMV9_1A, "9.1-A", "v9.1a",
+             ARMBuildAttrs::CPUArch::v8_A, FK_NEON_FP_ARMV8,
+             (AArch64::AEK_CRC | AArch64::AEK_FP |
+              AArch64::AEK_SIMD | AArch64::AEK_RAS | AArch64::AEK_LSE |
+              AArch64::AEK_RDM | AArch64::AEK_RCPC | AArch64::AEK_DOTPROD |
+              AArch64::AEK_SVE2))
+AARCH64_ARCH("armv9.2-a", ARMV9_2A, "9.2-A", "v9.2a",
+             ARMBuildAttrs::CPUArch::v8_A, FK_NEON_FP_ARMV8,
+             (AArch64::AEK_CRC | AArch64::AEK_FP |
+              AArch64::AEK_SIMD | AArch64::AEK_RAS | AArch64::AEK_LSE |
+              AArch64::AEK_RDM | AArch64::AEK_RCPC | AArch64::AEK_DOTPROD |
+              AArch64::AEK_SVE2))
 // For v8-R, we do not enable crypto and align with GCC that enables a more
 // minimal set of optional architecture extensions.
 AARCH64_ARCH("armv8-r", ARMV8R, "8-R", "v8r",
@@ -126,6 +144,11 @@ AARCH64_CPU_NAME("cortex-a53", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, true,
                  (AArch64::AEK_CRC))
 AARCH64_CPU_NAME("cortex-a55", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
                  (AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | AArch64::AEK_RCPC))
+AARCH64_CPU_NAME("cortex-a510", ARMV9A, FK_NEON_FP_ARMV8, false,
+                 (AArch64::AEK_BF16 | AArch64::AEK_I8MM | AArch64::AEK_SB |
+                  AArch64::AEK_PAUTH | AArch64::AEK_MTE | AArch64::AEK_SSBS |
+                  AArch64::AEK_SVE | AArch64::AEK_SVE2 | AArch64::AEK_SVE2BITPERM |
+                  AArch64::AEK_FP16FML))
 AARCH64_CPU_NAME("cortex-a57", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
                  (AArch64::AEK_CRC))
 AARCH64_CPU_NAME("cortex-a65", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
@@ -155,11 +178,20 @@ AARCH64_CPU_NAME("cortex-a78", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
 AARCH64_CPU_NAME("cortex-a78c", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
                  (AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | AArch64::AEK_RCPC |
                   AArch64::AEK_SSBS))
+AARCH64_CPU_NAME("cortex-a710", ARMV9A, FK_NEON_FP_ARMV8, false,
+                 (AArch64::AEK_MTE | AArch64::AEK_PAUTH | AArch64::AEK_FLAGM |
+                  AArch64::AEK_SB | AArch64::AEK_I8MM | AArch64::AEK_FP16FML |
+                  AArch64::AEK_SVE2BITPERM | AArch64::AEK_BF16))
 AARCH64_CPU_NAME("cortex-r82", ARMV8R, FK_CRYPTO_NEON_FP_ARMV8, false,
                  (AArch64::AEK_LSE))
 AARCH64_CPU_NAME("cortex-x1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
                  (AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | AArch64::AEK_RCPC |
                   AArch64::AEK_SSBS))
+AARCH64_CPU_NAME("cortex-x2", ARMV9A, FK_NEON_FP_ARMV8, false,
+                 (AArch64::AEK_MTE | AArch64::AEK_BF16 | AArch64::AEK_I8MM |
+                  AArch64::AEK_PAUTH | AArch64::AEK_SSBS | AArch64::AEK_SB |
+                  AArch64::AEK_SVE | AArch64::AEK_SVE2 | AArch64::AEK_SVE2BITPERM |
+                  AArch64::AEK_FP16FML))
 AARCH64_CPU_NAME("neoverse-e1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
                  (AArch64::AEK_DOTPROD | AArch64::AEK_FP16 | AArch64::AEK_RAS |
                   AArch64::AEK_RCPC | AArch64::AEK_SSBS))
@@ -172,6 +204,10 @@ AARCH64_CPU_NAME("neoverse-n2", ARMV8_5A, FK_CRYPTO_NEON_FP_ARMV8, false,
                   AArch64::AEK_I8MM | AArch64::AEK_MTE | AArch64::AEK_RAS |
                   AArch64::AEK_RCPC | AArch64::AEK_SB | AArch64::AEK_SSBS |
                   AArch64::AEK_SVE | AArch64::AEK_SVE2 | AArch64::AEK_SVE2BITPERM))
+AARCH64_CPU_NAME("neoverse-512tvb", ARMV8_4A, FK_CRYPTO_NEON_FP_ARMV8, false,
+                 (AArch64::AEK_RAS | AArch64::AEK_SVE | AArch64::AEK_SSBS |
+                  AArch64::AEK_RCPC | AArch64::AEK_FP16 | AArch64::AEK_BF16 |
+                  AArch64::AEK_DOTPROD ))
 AARCH64_CPU_NAME("neoverse-v1", ARMV8_4A, FK_CRYPTO_NEON_FP_ARMV8, false,
                  (AArch64::AEK_RAS | AArch64::AEK_SVE | AArch64::AEK_SSBS |
                   AArch64::AEK_RCPC | AArch64::AEK_FP16 | AArch64::AEK_BF16 |
diff --git a/llvm/include/llvm/Support/ARMTargetParser.def b/llvm/include/llvm/Support/ARMTargetParser.def
index 14b169a6e111..fd08f3e6960c 100644
--- a/llvm/include/llvm/Support/ARMTargetParser.def
+++ b/llvm/include/llvm/Support/ARMTargetParser.def
@@ -122,6 +122,21 @@ ARM_ARCH("armv8.7-a", ARMV8_7A, "8.7-A", "v8.7a",
          (ARM::AEK_SEC        | ARM::AEK_MP   | ARM::AEK_VIRT | ARM::AEK_HWDIVARM |
           ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP  | ARM::AEK_CRC  | ARM::AEK_RAS |
           ARM::AEK_DOTPROD    | ARM::AEK_BF16 | ARM::AEK_I8MM))
+ARM_ARCH("armv9-a", ARMV9A, "9-A", "v9a",
+         ARMBuildAttrs::CPUArch::v8_A, FK_NEON_FP_ARMV8,
+         (ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM |
+          ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS |
+          ARM::AEK_DOTPROD))
+ARM_ARCH("armv9.1-a", ARMV9_1A, "9.1-A", "v9.1a",
+         ARMBuildAttrs::CPUArch::v8_A, FK_NEON_FP_ARMV8,
+         (ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM |
+          ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS |
+          ARM::AEK_DOTPROD | ARM::AEK_BF16 | ARM::AEK_I8MM))
+ARM_ARCH("armv9.2-a", ARMV9_2A, "9.2-A", "v9.2a",
+         ARMBuildAttrs::CPUArch::v8_A, FK_NEON_FP_ARMV8,
+         (ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM |
+          ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS |
+          ARM::AEK_DOTPROD | ARM::AEK_BF16 | ARM::AEK_I8MM))
 ARM_ARCH("armv8-r", ARMV8R, "8-R", "v8r", ARMBuildAttrs::CPUArch::v8_R,
           FK_NEON_FP_ARMV8,
           (ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB |
@@ -296,6 +311,9 @@ ARM_CPU_NAME("cortex-a78", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
              (ARM::AEK_FP16 | ARM::AEK_DOTPROD))
 ARM_CPU_NAME("cortex-a78c", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
              ARM::AEK_FP16 | ARM::AEK_DOTPROD)
+ARM_CPU_NAME("cortex-a710", ARMV9A, FK_NEON_FP_ARMV8, false,
+             (ARM::AEK_DOTPROD | ARM::AEK_FP16FML | ARM::AEK_BF16 | ARM::AEK_SB |
+              ARM::AEK_I8MM))
 ARM_CPU_NAME("cortex-x1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
              (ARM::AEK_FP16 | ARM::AEK_DOTPROD))
 ARM_CPU_NAME("neoverse-n1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
diff --git a/llvm/include/llvm/Support/Allocator.h b/llvm/include/llvm/Support/Allocator.h
index 245432debce6..9e8ce4e36197 100644
--- a/llvm/include/llvm/Support/Allocator.h
+++ b/llvm/include/llvm/Support/Allocator.h
@@ -277,7 +277,7 @@ public:
     size_t TotalMemory = 0;
     for (auto I = Slabs.begin(), E = Slabs.end(); I != E; ++I)
       TotalMemory += computeSlabSize(std::distance(Slabs.begin(), I));
-    for (auto &PtrAndSize : CustomSizedSlabs)
+    for (const auto &PtrAndSize : CustomSizedSlabs)
       TotalMemory += PtrAndSize.second;
     return TotalMemory;
   }
diff --git a/llvm/include/llvm/Support/AtomicOrdering.h b/llvm/include/llvm/Support/AtomicOrdering.h
index 27ca825cef46..1a0d108300bc 100644
--- a/llvm/include/llvm/Support/AtomicOrdering.h
+++ b/llvm/include/llvm/Support/AtomicOrdering.h
@@ -133,6 +133,16 @@ inline bool isReleaseOrStronger(AtomicOrdering AO) {
   return isAtLeastOrStrongerThan(AO, AtomicOrdering::Release);
 }
 
+/// Return a single atomic ordering that is at least as strong as both the \p AO
+/// and \p Other orderings for an atomic operation.
+inline AtomicOrdering getMergedAtomicOrdering(AtomicOrdering AO,
+                                              AtomicOrdering Other) {
+  if ((AO == AtomicOrdering::Acquire && Other == AtomicOrdering::Release) ||
+      (AO == AtomicOrdering::Release && Other == AtomicOrdering::Acquire))
+    return AtomicOrdering::AcquireRelease;
+  return isStrongerThan(AO, Other) ? AO : Other;
+}
+
 inline AtomicOrderingCABI toCABI(AtomicOrdering AO) {
   static const AtomicOrderingCABI lookup[8] = {
       /* NotAtomic */ AtomicOrderingCABI::relaxed,
diff --git a/llvm/include/llvm/Support/BinaryByteStream.h b/llvm/include/llvm/Support/BinaryByteStream.h
index ca5bb5abecfc..7d8b6d2dc43d 100644
--- a/llvm/include/llvm/Support/BinaryByteStream.h
+++ b/llvm/include/llvm/Support/BinaryByteStream.h
@@ -38,7 +38,7 @@ public:
 
   llvm::support::endianness getEndian() const override { return Endian; }
 
-  Error readBytes(uint32_t Offset, uint32_t Size,
+  Error readBytes(uint64_t Offset, uint64_t Size,
                   ArrayRef<uint8_t> &Buffer) override {
     if (auto EC = checkOffsetForRead(Offset, Size))
       return EC;
@@ -46,7 +46,7 @@ public:
     return Error::success();
   }
 
-  Error readLongestContiguousChunk(uint32_t Offset,
+  Error readLongestContiguousChunk(uint64_t Offset,
                                    ArrayRef<uint8_t> &Buffer) override {
     if (auto EC = checkOffsetForRead(Offset, 1))
       return EC;
@@ -54,7 +54,7 @@ public:
     return Error::success();
   }
 
-  uint32_t getLength() override { return Data.size(); }
+  uint64_t getLength() override { return Data.size(); }
 
   ArrayRef<uint8_t> data() const { return Data; }
 
@@ -97,19 +97,19 @@ public:
     return ImmutableStream.getEndian();
   }
 
-  Error readBytes(uint32_t Offset, uint32_t Size,
+  Error readBytes(uint64_t Offset, uint64_t Size,
                   ArrayRef<uint8_t> &Buffer) override {
     return ImmutableStream.readBytes(Offset, Size, Buffer);
   }
 
-  Error readLongestContiguousChunk(uint32_t Offset,
+  Error readLongestContiguousChunk(uint64_t Offset,
                                    ArrayRef<uint8_t> &Buffer) override {
     return ImmutableStream.readLongestContiguousChunk(Offset, Buffer);
   }
 
-  uint32_t getLength() override { return ImmutableStream.getLength(); }
+  uint64_t getLength() override { return ImmutableStream.getLength(); }
 
-  Error writeBytes(uint32_t Offset, ArrayRef<uint8_t> Buffer) override {
+  Error writeBytes(uint64_t Offset, ArrayRef<uint8_t> Buffer) override {
     if (Buffer.empty())
       return Error::success();
 
@@ -145,7 +145,7 @@ public:
 
   llvm::support::endianness getEndian() const override { return Endian; }
 
-  Error readBytes(uint32_t Offset, uint32_t Size,
+  Error readBytes(uint64_t Offset, uint64_t Size,
                   ArrayRef<uint8_t> &Buffer) override {
     if (auto EC = checkOffsetForWrite(Offset, Buffer.size()))
       return EC;
@@ -154,11 +154,11 @@ public:
     return Error::success();
   }
 
-  void insert(uint32_t Offset, ArrayRef<uint8_t> Bytes) {
+  void insert(uint64_t Offset, ArrayRef<uint8_t> Bytes) {
     Data.insert(Data.begin() + Offset, Bytes.begin(), Bytes.end());
   }
 
-  Error readLongestContiguousChunk(uint32_t Offset,
+  Error readLongestContiguousChunk(uint64_t Offset,
                                    ArrayRef<uint8_t> &Buffer) override {
     if (auto EC = checkOffsetForWrite(Offset, 1))
       return EC;
@@ -167,9 +167,9 @@ public:
     return Error::success();
   }
 
-  uint32_t getLength() override { return Data.size(); }
+  uint64_t getLength() override { return Data.size(); }
 
-  Error writeBytes(uint32_t Offset, ArrayRef<uint8_t> Buffer) override {
+  Error writeBytes(uint64_t Offset, ArrayRef<uint8_t> Buffer) override {
     if (Buffer.empty())
       return Error::success();
 
@@ -182,7 +182,7 @@ public:
     if (Offset > getLength())
       return make_error<BinaryStreamError>(stream_error_code::invalid_offset);
 
-    uint32_t RequiredSize = Offset + Buffer.size();
+    uint64_t RequiredSize = Offset + Buffer.size();
     if (RequiredSize > Data.size())
       Data.resize(RequiredSize);
 
@@ -240,19 +240,19 @@ public:
     return Impl.getEndian();
   }
 
-  Error readBytes(uint32_t Offset, uint32_t Size,
+  Error readBytes(uint64_t Offset, uint64_t Size,
                   ArrayRef<uint8_t> &Buffer) override {
     return Impl.readBytes(Offset, Size, Buffer);
   }
 
-  Error readLongestContiguousChunk(uint32_t Offset,
+  Error readLongestContiguousChunk(uint64_t Offset,
                                    ArrayRef<uint8_t> &Buffer) override {
     return Impl.readLongestContiguousChunk(Offset, Buffer);
   }
 
-  uint32_t getLength() override { return Impl.getLength(); }
+  uint64_t getLength() override { return Impl.getLength(); }
 
-  Error writeBytes(uint32_t Offset, ArrayRef<uint8_t> Data) override {
+  Error writeBytes(uint64_t Offset, ArrayRef<uint8_t> Data) override {
     return Impl.writeBytes(Offset, Data);
   }
 
diff --git a/llvm/include/llvm/Support/BinaryItemStream.h b/llvm/include/llvm/Support/BinaryItemStream.h
index 4d27013ce368..eb512bf4721a 100644
--- a/llvm/include/llvm/Support/BinaryItemStream.h
+++ b/llvm/include/llvm/Support/BinaryItemStream.h
@@ -38,7 +38,7 @@ public:
 
   llvm::support::endianness getEndian() const override { return Endian; }
 
-  Error readBytes(uint32_t Offset, uint32_t Size,
+  Error readBytes(uint64_t Offset, uint64_t Size,
                   ArrayRef<uint8_t> &Buffer) override {
     auto ExpectedIndex = translateOffsetIndex(Offset);
     if (!ExpectedIndex)
@@ -52,7 +52,7 @@ public:
     return Error::success();
   }
 
-  Error readLongestContiguousChunk(uint32_t Offset,
+  Error readLongestContiguousChunk(uint64_t Offset,
                                    ArrayRef<uint8_t> &Buffer) override {
     auto ExpectedIndex = translateOffsetIndex(Offset);
     if (!ExpectedIndex)
@@ -66,7 +66,7 @@ public:
     computeItemOffsets();
   }
 
-  uint32_t getLength() override {
+  uint64_t getLength() override {
     return ItemEndOffsets.empty() ? 0 : ItemEndOffsets.back();
   }
 
@@ -74,16 +74,16 @@ private:
   void computeItemOffsets() {
     ItemEndOffsets.clear();
     ItemEndOffsets.reserve(Items.size());
-    uint32_t CurrentOffset = 0;
+    uint64_t CurrentOffset = 0;
     for (const auto &Item : Items) {
-      uint32_t Len = Traits::length(Item);
+      uint64_t Len = Traits::length(Item);
       assert(Len > 0 && "no empty items");
       CurrentOffset += Len;
       ItemEndOffsets.push_back(CurrentOffset);
     }
   }
 
-  Expected<uint32_t> translateOffsetIndex(uint32_t Offset) {
+  Expected<uint32_t> translateOffsetIndex(uint64_t Offset) {
     // Make sure the offset is somewhere in our items array.
     if (Offset >= getLength())
       return make_error<BinaryStreamError>(stream_error_code::stream_too_short);
@@ -98,7 +98,7 @@ private:
   ArrayRef<T> Items;
 
   // Sorted vector of offsets to accelerate lookup.
-  std::vector<uint32_t> ItemEndOffsets;
+  std::vector<uint64_t> ItemEndOffsets;
 };
 
 } // end namespace llvm
diff --git a/llvm/include/llvm/Support/BinaryStream.h b/llvm/include/llvm/Support/BinaryStream.h
index fcf4398550ee..e87129d8c201 100644
--- a/llvm/include/llvm/Support/BinaryStream.h
+++ b/llvm/include/llvm/Support/BinaryStream.h
@@ -41,22 +41,22 @@ public:
   /// Given an offset into the stream and a number of bytes, attempt to
   /// read the bytes and set the output ArrayRef to point to data owned by the
   /// stream.
-  virtual Error readBytes(uint32_t Offset, uint32_t Size,
+  virtual Error readBytes(uint64_t Offset, uint64_t Size,
                           ArrayRef<uint8_t> &Buffer) = 0;
 
   /// Given an offset into the stream, read as much as possible without
   /// copying any data.
-  virtual Error readLongestContiguousChunk(uint32_t Offset,
+  virtual Error readLongestContiguousChunk(uint64_t Offset,
                                            ArrayRef<uint8_t> &Buffer) = 0;
 
   /// Return the number of bytes of data in this stream.
-  virtual uint32_t getLength() = 0;
+  virtual uint64_t getLength() = 0;
 
   /// Return the properties of this stream.
   virtual BinaryStreamFlags getFlags() const { return BSF_None; }
 
 protected:
-  Error checkOffsetForRead(uint32_t Offset, uint32_t DataSize) {
+  Error checkOffsetForRead(uint64_t Offset, uint64_t DataSize) {
     if (Offset > getLength())
       return make_error<BinaryStreamError>(stream_error_code::invalid_offset);
     if (getLength() < DataSize + Offset)
@@ -77,7 +77,7 @@ public:
   /// Attempt to write the given bytes into the stream at the desired
   /// offset. This will always necessitate a copy.  Cannot shrink or grow the
   /// stream, only writes into existing allocated space.
-  virtual Error writeBytes(uint32_t Offset, ArrayRef<uint8_t> Data) = 0;
+  virtual Error writeBytes(uint64_t Offset, ArrayRef<uint8_t> Data) = 0;
 
   /// For buffered streams, commits changes to the backing store.
   virtual Error commit() = 0;
@@ -86,7 +86,7 @@ public:
   BinaryStreamFlags getFlags() const override { return BSF_Write; }
 
 protected:
-  Error checkOffsetForWrite(uint32_t Offset, uint32_t DataSize) {
+  Error checkOffsetForWrite(uint64_t Offset, uint64_t DataSize) {
     if (!(getFlags() & BSF_Append))
       return checkOffsetForRead(Offset, DataSize);
 
diff --git a/llvm/include/llvm/Support/BinaryStreamArray.h b/llvm/include/llvm/Support/BinaryStreamArray.h
index 148ab85169f2..85d29be26ca9 100644
--- a/llvm/include/llvm/Support/BinaryStreamArray.h
+++ b/llvm/include/llvm/Support/BinaryStreamArray.h
@@ -153,7 +153,7 @@ private:
 template <typename ValueType, typename Extractor>
 class VarStreamArrayIterator
     : public iterator_facade_base<VarStreamArrayIterator<ValueType, Extractor>,
-                                  std::forward_iterator_tag, ValueType> {
+                                  std::forward_iterator_tag, const ValueType> {
   typedef VarStreamArrayIterator<ValueType, Extractor> IterType;
   typedef VarStreamArray<ValueType, Extractor> ArrayType;
 
@@ -197,11 +197,6 @@ public:
     return ThisValue;
   }
 
-  ValueType &operator*() {
-    assert(Array && !HasError);
-    return ThisValue;
-  }
-
   IterType &operator+=(unsigned N) {
     for (unsigned I = 0; I < N; ++I) {
       // We are done with the current record, discard it so that we are
diff --git a/llvm/include/llvm/Support/BinaryStreamReader.h b/llvm/include/llvm/Support/BinaryStreamReader.h
index 9ad98a89aaf9..29b4b09b848c 100644
--- a/llvm/include/llvm/Support/BinaryStreamReader.h
+++ b/llvm/include/llvm/Support/BinaryStreamReader.h
@@ -251,16 +251,16 @@ public:
   }
 
   bool empty() const { return bytesRemaining() == 0; }
-  void setOffset(uint32_t Off) { Offset = Off; }
-  uint32_t getOffset() const { return Offset; }
-  uint32_t getLength() const { return Stream.getLength(); }
-  uint32_t bytesRemaining() const { return getLength() - getOffset(); }
+  void setOffset(uint64_t Off) { Offset = Off; }
+  uint64_t getOffset() const { return Offset; }
+  uint64_t getLength() const { return Stream.getLength(); }
+  uint64_t bytesRemaining() const { return getLength() - getOffset(); }
 
   /// Advance the stream's offset by \p Amount bytes.
   ///
   /// \returns a success error code if at least \p Amount bytes remain in the
   /// stream, otherwise returns an appropriate error code.
-  Error skip(uint32_t Amount);
+  Error skip(uint64_t Amount);
 
   /// Examine the next byte of the underlying stream without advancing the
   /// stream's offset.  If the stream is empty the behavior is undefined.
@@ -271,11 +271,11 @@ public:
   Error padToAlignment(uint32_t Align);
 
   std::pair<BinaryStreamReader, BinaryStreamReader>
-  split(uint32_t Offset) const;
+  split(uint64_t Offset) const;
 
 private:
   BinaryStreamRef Stream;
-  uint32_t Offset = 0;
+  uint64_t Offset = 0;
 };
 } // namespace llvm
 
diff --git a/llvm/include/llvm/Support/BinaryStreamRef.h b/llvm/include/llvm/Support/BinaryStreamRef.h
index ba4c3873586d..e0aaab82ffab 100644
--- a/llvm/include/llvm/Support/BinaryStreamRef.h
+++ b/llvm/include/llvm/Support/BinaryStreamRef.h
@@ -30,12 +30,12 @@ protected:
       Length = BorrowedImpl.getLength();
   }
 
-  BinaryStreamRefBase(std::shared_ptr<StreamType> SharedImpl, uint32_t Offset,
-                      Optional<uint32_t> Length)
+  BinaryStreamRefBase(std::shared_ptr<StreamType> SharedImpl, uint64_t Offset,
+                      Optional<uint64_t> Length)
       : SharedImpl(SharedImpl), BorrowedImpl(SharedImpl.get()),
         ViewOffset(Offset), Length(Length) {}
-  BinaryStreamRefBase(StreamType &BorrowedImpl, uint32_t Offset,
-                      Optional<uint32_t> Length)
+  BinaryStreamRefBase(StreamType &BorrowedImpl, uint64_t Offset,
+                      Optional<uint64_t> Length)
       : BorrowedImpl(&BorrowedImpl), ViewOffset(Offset), Length(Length) {}
   BinaryStreamRefBase(const BinaryStreamRefBase &Other) = default;
   BinaryStreamRefBase &operator=(const BinaryStreamRefBase &Other) = default;
@@ -48,7 +48,7 @@ public:
     return BorrowedImpl->getEndian();
   }
 
-  uint32_t getLength() const {
+  uint64_t getLength() const {
     if (Length.hasValue())
       return *Length;
 
@@ -58,7 +58,7 @@ public:
   /// Return a new BinaryStreamRef with the first \p N elements removed.  If
   /// this BinaryStreamRef is length-tracking, then the resulting one will be
   /// too.
-  RefType drop_front(uint32_t N) const {
+  RefType drop_front(uint64_t N) const {
     if (!BorrowedImpl)
       return RefType();
 
@@ -76,7 +76,7 @@ public:
   /// Return a new BinaryStreamRef with the last \p N elements removed.  If
   /// this BinaryStreamRef is length-tracking and \p N is greater than 0, then
   /// this BinaryStreamRef will no longer length-track.
-  RefType drop_back(uint32_t N) const {
+  RefType drop_back(uint64_t N) const {
     if (!BorrowedImpl)
       return RefType();
 
@@ -96,26 +96,26 @@ public:
   }
 
   /// Return a new BinaryStreamRef with only the first \p N elements remaining.
-  RefType keep_front(uint32_t N) const {
+  RefType keep_front(uint64_t N) const {
     assert(N <= getLength());
     return drop_back(getLength() - N);
   }
 
   /// Return a new BinaryStreamRef with only the last \p N elements remaining.
-  RefType keep_back(uint32_t N) const {
+  RefType keep_back(uint64_t N) const {
     assert(N <= getLength());
     return drop_front(getLength() - N);
   }
 
   /// Return a new BinaryStreamRef with the first and last \p N elements
   /// removed.
-  RefType drop_symmetric(uint32_t N) const {
+  RefType drop_symmetric(uint64_t N) const {
     return drop_front(N).drop_back(N);
   }
 
   /// Return a new BinaryStreamRef with the first \p Offset elements removed,
   /// and retaining exactly \p Len elements.
-  RefType slice(uint32_t Offset, uint32_t Len) const {
+  RefType slice(uint64_t Offset, uint64_t Len) const {
     return drop_front(Offset).keep_front(Len);
   }
 
@@ -132,7 +132,7 @@ public:
   }
 
 protected:
-  Error checkOffsetForRead(uint32_t Offset, uint32_t DataSize) const {
+  Error checkOffsetForRead(uint64_t Offset, uint64_t DataSize) const {
     if (Offset > getLength())
       return make_error<BinaryStreamError>(stream_error_code::invalid_offset);
     if (getLength() < DataSize + Offset)
@@ -142,8 +142,8 @@ protected:
 
   std::shared_ptr<StreamType> SharedImpl;
   StreamType *BorrowedImpl = nullptr;
-  uint32_t ViewOffset = 0;
-  Optional<uint32_t> Length;
+  uint64_t ViewOffset = 0;
+  Optional<uint64_t> Length;
 };
 
 /// BinaryStreamRef is to BinaryStream what ArrayRef is to an Array.  It
@@ -157,15 +157,15 @@ class BinaryStreamRef
     : public BinaryStreamRefBase<BinaryStreamRef, BinaryStream> {
   friend BinaryStreamRefBase<BinaryStreamRef, BinaryStream>;
   friend class WritableBinaryStreamRef;
-  BinaryStreamRef(std::shared_ptr<BinaryStream> Impl, uint32_t ViewOffset,
-                  Optional<uint32_t> Length)
+  BinaryStreamRef(std::shared_ptr<BinaryStream> Impl, uint64_t ViewOffset,
+                  Optional<uint64_t> Length)
       : BinaryStreamRefBase(Impl, ViewOffset, Length) {}
 
 public:
   BinaryStreamRef() = default;
   BinaryStreamRef(BinaryStream &Stream);
-  BinaryStreamRef(BinaryStream &Stream, uint32_t Offset,
-                  Optional<uint32_t> Length);
+  BinaryStreamRef(BinaryStream &Stream, uint64_t Offset,
+                  Optional<uint64_t> Length);
   explicit BinaryStreamRef(ArrayRef<uint8_t> Data,
                            llvm::support::endianness Endian);
   explicit BinaryStreamRef(StringRef Data, llvm::support::endianness Endian);
@@ -176,8 +176,8 @@ public:
   BinaryStreamRef &operator=(BinaryStreamRef &&Other) = default;
 
   // Use BinaryStreamRef.slice() instead.
-  BinaryStreamRef(BinaryStreamRef &S, uint32_t Offset,
-                  uint32_t Length) = delete;
+  BinaryStreamRef(BinaryStreamRef &S, uint64_t Offset,
+                  uint64_t Length) = delete;
 
   /// Given an Offset into this StreamRef and a Size, return a reference to a
   /// buffer owned by the stream.
@@ -185,7 +185,7 @@ public:
   /// \returns a success error code if the entire range of data is within the
   /// bounds of this BinaryStreamRef's view and the implementation could read
   /// the data, and an appropriate error code otherwise.
-  Error readBytes(uint32_t Offset, uint32_t Size,
+  Error readBytes(uint64_t Offset, uint64_t Size,
                   ArrayRef<uint8_t> &Buffer) const;
 
   /// Given an Offset into this BinaryStreamRef, return a reference to the
@@ -193,29 +193,28 @@ public:
   ///
   /// \returns a success error code if implementation could read the data,
   /// and an appropriate error code otherwise.
-  Error readLongestContiguousChunk(uint32_t Offset,
+  Error readLongestContiguousChunk(uint64_t Offset,
                                    ArrayRef<uint8_t> &Buffer) const;
 };
 
 struct BinarySubstreamRef {
-  uint32_t Offset = 0;        // Offset in the parent stream
+  uint64_t Offset = 0;        // Offset in the parent stream
   BinaryStreamRef StreamData; // Stream Data
 
-  BinarySubstreamRef slice(uint32_t Off, uint32_t Size) const {
+  BinarySubstreamRef slice(uint64_t Off, uint64_t Size) const {
     BinaryStreamRef SubSub = StreamData.slice(Off, Size);
     return {Off + Offset, SubSub};
   }
-  BinarySubstreamRef drop_front(uint32_t N) const {
+  BinarySubstreamRef drop_front(uint64_t N) const {
     return slice(N, size() - N);
   }
-  BinarySubstreamRef keep_front(uint32_t N) const { return slice(0, N); }
+  BinarySubstreamRef keep_front(uint64_t N) const { return slice(0, N); }
 
-  std::pair<BinarySubstreamRef, BinarySubstreamRef>
-  split(uint32_t Off) const {
+  std::pair<BinarySubstreamRef, BinarySubstreamRef> split(uint64_t Off) const {
     return std::make_pair(keep_front(Off), drop_front(Off));
   }
 
-  uint32_t size() const { return StreamData.getLength(); }
+  uint64_t size() const { return StreamData.getLength(); }
   bool empty() const { return size() == 0; }
 };
 
@@ -224,10 +223,10 @@ class WritableBinaryStreamRef
                                  WritableBinaryStream> {
   friend BinaryStreamRefBase<WritableBinaryStreamRef, WritableBinaryStream>;
   WritableBinaryStreamRef(std::shared_ptr<WritableBinaryStream> Impl,
-                          uint32_t ViewOffset, Optional<uint32_t> Length)
+                          uint64_t ViewOffset, Optional<uint64_t> Length)
       : BinaryStreamRefBase(Impl, ViewOffset, Length) {}
 
-  Error checkOffsetForWrite(uint32_t Offset, uint32_t DataSize) const {
+  Error checkOffsetForWrite(uint64_t Offset, uint64_t DataSize) const {
     if (!(BorrowedImpl->getFlags() & BSF_Append))
       return checkOffsetForRead(Offset, DataSize);
 
@@ -239,8 +238,8 @@ class WritableBinaryStreamRef
 public:
   WritableBinaryStreamRef() = default;
   WritableBinaryStreamRef(WritableBinaryStream &Stream);
-  WritableBinaryStreamRef(WritableBinaryStream &Stream, uint32_t Offset,
-                          Optional<uint32_t> Length);
+  WritableBinaryStreamRef(WritableBinaryStream &Stream, uint64_t Offset,
+                          Optional<uint64_t> Length);
   explicit WritableBinaryStreamRef(MutableArrayRef<uint8_t> Data,
                                    llvm::support::endianness Endian);
   WritableBinaryStreamRef(const WritableBinaryStreamRef &Other) = default;
@@ -251,8 +250,8 @@ public:
   WritableBinaryStreamRef &operator=(WritableBinaryStreamRef &&Other) = default;
 
   // Use WritableBinaryStreamRef.slice() instead.
-  WritableBinaryStreamRef(WritableBinaryStreamRef &S, uint32_t Offset,
-                          uint32_t Length) = delete;
+  WritableBinaryStreamRef(WritableBinaryStreamRef &S, uint64_t Offset,
+                          uint64_t Length) = delete;
 
   /// Given an Offset into this WritableBinaryStreamRef and some input data,
   /// writes the data to the underlying stream.
@@ -260,7 +259,7 @@ public:
   /// \returns a success error code if the data could fit within the underlying
   /// stream at the specified location and the implementation could write the
   /// data, and an appropriate error code otherwise.
-  Error writeBytes(uint32_t Offset, ArrayRef<uint8_t> Data) const;
+  Error writeBytes(uint64_t Offset, ArrayRef<uint8_t> Data) const;
 
   /// Conver this WritableBinaryStreamRef to a read-only BinaryStreamRef.
   operator BinaryStreamRef() const;
diff --git a/llvm/include/llvm/Support/BinaryStreamWriter.h b/llvm/include/llvm/Support/BinaryStreamWriter.h
index ceba792e6b26..3054f4ac7ef0 100644
--- a/llvm/include/llvm/Support/BinaryStreamWriter.h
+++ b/llvm/include/llvm/Support/BinaryStreamWriter.h
@@ -124,7 +124,7 @@ public:
   ///
   /// \returns a success error code if the data was successfully written,
   /// otherwise returns an appropriate error code.
-  Error writeStreamRef(BinaryStreamRef Ref, uint32_t Size);
+  Error writeStreamRef(BinaryStreamRef Ref, uint64_t Size);
 
   /// Writes the object \p Obj to the underlying stream, as if by using memcpy.
   /// It is up to the caller to ensure that type of \p Obj can be safely copied
@@ -178,17 +178,17 @@ public:
   }
 
   /// Splits the Writer into two Writers at a given offset.
-  std::pair<BinaryStreamWriter, BinaryStreamWriter> split(uint32_t Off) const;
+  std::pair<BinaryStreamWriter, BinaryStreamWriter> split(uint64_t Off) const;
 
-  void setOffset(uint32_t Off) { Offset = Off; }
-  uint32_t getOffset() const { return Offset; }
-  uint32_t getLength() const { return Stream.getLength(); }
-  uint32_t bytesRemaining() const { return getLength() - getOffset(); }
+  void setOffset(uint64_t Off) { Offset = Off; }
+  uint64_t getOffset() const { return Offset; }
+  uint64_t getLength() const { return Stream.getLength(); }
+  uint64_t bytesRemaining() const { return getLength() - getOffset(); }
   Error padToAlignment(uint32_t Align);
 
 protected:
   WritableBinaryStreamRef Stream;
-  uint32_t Offset = 0;
+  uint64_t Offset = 0;
 };
 
 } // end namespace llvm
diff --git a/llvm/include/llvm/Support/Caching.h b/llvm/include/llvm/Support/Caching.h
new file mode 100644
index 000000000000..1e5fea17f708
--- /dev/null
+++ b/llvm/include/llvm/Support/Caching.h
@@ -0,0 +1,71 @@
+//===- Caching.h - LLVM Local File Cache ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the CachedFileStream and the localCache function, which
+// simplifies caching files on the local filesystem in a directory whose
+// contents are managed by a CachePruningPolicy.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_CACHING_H
+#define LLVM_SUPPORT_CACHING_H
+
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+
+class MemoryBuffer;
+
+/// This class wraps an output stream for a file. Most clients should just be
+/// able to return an instance of this base class from the stream callback, but
+/// if a client needs to perform some action after the stream is written to,
+/// that can be done by deriving from this class and overriding the destructor.
+class CachedFileStream {
+public:
+  CachedFileStream(std::unique_ptr<raw_pwrite_stream> OS) : OS(std::move(OS)) {}
+  std::unique_ptr<raw_pwrite_stream> OS;
+  virtual ~CachedFileStream() = default;
+};
+
+/// This type defines the callback to add a file that is generated on the fly.
+///
+/// Stream callbacks must be thread safe.
+using AddStreamFn =
+    std::function<Expected<std::unique_ptr<CachedFileStream>>(unsigned Task)>;
+
+/// This is the type of a file cache. To request an item from the cache, pass a
+/// unique string as the Key. For hits, the cached file will be added to the
+/// link and this function will return AddStreamFn(). For misses, the cache will
+/// return a stream callback which must be called at most once to produce
+/// content for the stream. The file stream produced by the stream callback will
+/// add the file to the link after the stream is written to.
+///
+/// Clients generally look like this:
+///
+/// if (AddStreamFn AddStream = Cache(Task, Key))
+///   ProduceContent(AddStream);
+using FileCache =
+    std::function<Expected<AddStreamFn>(unsigned Task, StringRef Key)>;
+
+/// This type defines the callback to add a pre-existing file (e.g. in a cache).
+///
+/// Buffer callbacks must be thread safe.
+using AddBufferFn =
+    std::function<void(unsigned Task, std::unique_ptr<MemoryBuffer> MB)>;
+
+/// Create a local file system cache which uses the given cache name, temporary
+/// file prefix, cache directory and file callback. This function also creates
+/// the cache directory if it does not already exist. The cache name appears in
+/// error messages for errors during caching. The temporary file prefix is used
+/// in the temporary file naming scheme used when writing files atomically.
+Expected<FileCache> localCache(Twine CacheNameRef, Twine TempFilePrefixRef,
+                               Twine CacheDirectoryPathRef,
+                               AddBufferFn AddBuffer);
+} // namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/Support/CommandLine.h b/llvm/include/llvm/Support/CommandLine.h
index 14d7e21f78b2..2ee02010ff1d 100644
--- a/llvm/include/llvm/Support/CommandLine.h
+++ b/llvm/include/llvm/Support/CommandLine.h
@@ -316,9 +316,7 @@ public:
   }
 
   bool isInAllSubCommands() const {
-    return any_of(Subs, [](const SubCommand *SC) {
-      return SC == &*AllSubCommands;
-    });
+    return llvm::is_contained(Subs, &*AllSubCommands);
   }
 
   //-------------------------------------------------------------------------===
@@ -926,6 +924,9 @@ public:
 //--------------------------------------------------
 // parser<bool>
 //
+
+extern template class basic_parser<bool>;
+
 template <> class parser<bool> : public basic_parser<bool> {
 public:
   parser(Option &O) : basic_parser(O) {}
@@ -949,10 +950,11 @@ public:
   void anchor() override;
 };
 
-extern template class basic_parser<bool>;
-
 //--------------------------------------------------
 // parser<boolOrDefault>
+
+extern template class basic_parser<boolOrDefault>;
+
 template <> class parser<boolOrDefault> : public basic_parser<boolOrDefault> {
 public:
   parser(Option &O) : basic_parser(O) {}
@@ -974,11 +976,12 @@ public:
   void anchor() override;
 };
 
-extern template class basic_parser<boolOrDefault>;
-
 //--------------------------------------------------
 // parser<int>
 //
+
+extern template class basic_parser<int>;
+
 template <> class parser<int> : public basic_parser<int> {
 public:
   parser(Option &O) : basic_parser(O) {}
@@ -996,11 +999,12 @@ public:
   void anchor() override;
 };
 
-extern template class basic_parser<int>;
-
 //--------------------------------------------------
 // parser<long>
 //
+
+extern template class basic_parser<long>;
+
 template <> class parser<long> final : public basic_parser<long> {
 public:
   parser(Option &O) : basic_parser(O) {}
@@ -1018,11 +1022,12 @@ public:
   void anchor() override;
 };
 
-extern template class basic_parser<long>;
-
 //--------------------------------------------------
 // parser<long long>
 //
+
+extern template class basic_parser<long long>;
+
 template <> class parser<long long> : public basic_parser<long long> {
 public:
   parser(Option &O) : basic_parser(O) {}
@@ -1040,11 +1045,12 @@ public:
   void anchor() override;
 };
 
-extern template class basic_parser<long long>;
-
 //--------------------------------------------------
 // parser<unsigned>
 //
+
+extern template class basic_parser<unsigned>;
+
 template <> class parser<unsigned> : public basic_parser<unsigned> {
 public:
   parser(Option &O) : basic_parser(O) {}
@@ -1062,11 +1068,12 @@ public:
   void anchor() override;
 };
 
-extern template class basic_parser<unsigned>;
-
 //--------------------------------------------------
 // parser<unsigned long>
 //
+
+extern template class basic_parser<unsigned long>;
+
 template <>
 class parser<unsigned long> final : public basic_parser<unsigned long> {
 public:
@@ -1085,11 +1092,12 @@ public:
   void anchor() override;
 };
 
-extern template class basic_parser<unsigned long>;
-
 //--------------------------------------------------
 // parser<unsigned long long>
 //
+
+extern template class basic_parser<unsigned long long>;
+
 template <>
 class parser<unsigned long long> : public basic_parser<unsigned long long> {
 public:
@@ -1109,11 +1117,12 @@ public:
   void anchor() override;
 };
 
-extern template class basic_parser<unsigned long long>;
-
 //--------------------------------------------------
 // parser<double>
 //
+
+extern template class basic_parser<double>;
+
 template <> class parser<double> : public basic_parser<double> {
 public:
   parser(Option &O) : basic_parser(O) {}
@@ -1131,11 +1140,12 @@ public:
   void anchor() override;
 };
 
-extern template class basic_parser<double>;
-
 //--------------------------------------------------
 // parser<float>
 //
+
+extern template class basic_parser<float>;
+
 template <> class parser<float> : public basic_parser<float> {
 public:
   parser(Option &O) : basic_parser(O) {}
@@ -1153,11 +1163,12 @@ public:
   void anchor() override;
 };
 
-extern template class basic_parser<float>;
-
 //--------------------------------------------------
 // parser<std::string>
 //
+
+extern template class basic_parser<std::string>;
+
 template <> class parser<std::string> : public basic_parser<std::string> {
 public:
   parser(Option &O) : basic_parser(O) {}
@@ -1178,11 +1189,12 @@ public:
   void anchor() override;
 };
 
-extern template class basic_parser<std::string>;
-
 //--------------------------------------------------
 // parser<char>
 //
+
+extern template class basic_parser<char>;
+
 template <> class parser<char> : public basic_parser<char> {
 public:
   parser(Option &O) : basic_parser(O) {}
@@ -1203,8 +1215,6 @@ public:
   void anchor() override;
 };
 
-extern template class basic_parser<char>;
-
 //--------------------------------------------------
 // PrintOptionDiff
 //
diff --git a/llvm/include/llvm/Support/Compiler.h b/llvm/include/llvm/Support/Compiler.h
index 57052b596edb..c5318137ed3d 100644
--- a/llvm/include/llvm/Support/Compiler.h
+++ b/llvm/include/llvm/Support/Compiler.h
@@ -97,7 +97,7 @@
 /// Sadly, this is separate from just rvalue reference support because GCC
 /// and MSVC implemented this later than everything else. This appears to be
 /// corrected in MSVC 2019 but not MSVC 2017.
-#if __has_feature(cxx_rvalue_references) || LLVM_GNUC_PREREQ(4, 8, 1) ||       \
+#if __has_feature(cxx_rvalue_references) || defined(__GNUC__) ||               \
     LLVM_MSC_PREREQ(1920)
 #define LLVM_HAS_RVALUE_REFERENCE_THIS 1
 #else
@@ -123,8 +123,8 @@
 /// LLVM_EXTERNAL_VISIBILITY - classes, functions, and variables marked with
 /// this attribute will be made public and visible outside of any shared library
 /// they are linked in to.
-#if (__has_attribute(visibility) || LLVM_GNUC_PREREQ(4, 0, 0)) &&              \
-    !defined(__MINGW32__) && !defined(__CYGWIN__) && !defined(_WIN32)
+#if __has_attribute(visibility) && !defined(__MINGW32__) &&                    \
+    !defined(__CYGWIN__) && !defined(_WIN32)
 #define LLVM_LIBRARY_VISIBILITY __attribute__ ((visibility("hidden")))
 #define LLVM_EXTERNAL_VISIBILITY __attribute__ ((visibility("default")))
 #else
@@ -138,7 +138,7 @@
 #define LLVM_PREFETCH(addr, rw, locality)
 #endif
 
-#if __has_attribute(used) || LLVM_GNUC_PREREQ(3, 1, 0)
+#if __has_attribute(used)
 #define LLVM_ATTRIBUTE_USED __attribute__((__used__))
 #else
 #define LLVM_ATTRIBUTE_USED
@@ -182,15 +182,15 @@
 // more portable solution:
 //   (void)unused_var_name;
 // Prefer cast-to-void wherever it is sufficient.
-#if __has_attribute(unused) || LLVM_GNUC_PREREQ(3, 1, 0)
+#if __has_attribute(unused)
 #define LLVM_ATTRIBUTE_UNUSED __attribute__((__unused__))
 #else
 #define LLVM_ATTRIBUTE_UNUSED
 #endif
 
 // FIXME: Provide this for PE/COFF targets.
-#if (__has_attribute(weak) || LLVM_GNUC_PREREQ(4, 0, 0)) &&                    \
-    (!defined(__MINGW32__) && !defined(__CYGWIN__) && !defined(_WIN32))
+#if __has_attribute(weak) && !defined(__MINGW32__) && !defined(__CYGWIN__) &&  \
+    !defined(_WIN32)
 #define LLVM_ATTRIBUTE_WEAK __attribute__((__weak__))
 #else
 #define LLVM_ATTRIBUTE_WEAK
@@ -212,7 +212,13 @@
 #define LLVM_READONLY
 #endif
 
-#if __has_builtin(__builtin_expect) || LLVM_GNUC_PREREQ(4, 0, 0)
+#if __has_attribute(minsize)
+#define LLVM_ATTRIBUTE_MINSIZE __attribute__((minsize))
+#else
+#define LLVM_ATTRIBUTE_MINSIZE
+#endif
+
+#if __has_builtin(__builtin_expect) || defined(__GNUC__)
 #define LLVM_LIKELY(EXPR) __builtin_expect((bool)(EXPR), true)
 #define LLVM_UNLIKELY(EXPR) __builtin_expect((bool)(EXPR), false)
 #else
@@ -222,7 +228,7 @@
 
 /// LLVM_ATTRIBUTE_NOINLINE - On compilers where we have a directive to do so,
 /// mark a method "not for inlining".
-#if __has_attribute(noinline) || LLVM_GNUC_PREREQ(3, 4, 0)
+#if __has_attribute(noinline)
 #define LLVM_ATTRIBUTE_NOINLINE __attribute__((noinline))
 #elif defined(_MSC_VER)
 #define LLVM_ATTRIBUTE_NOINLINE __declspec(noinline)
@@ -231,10 +237,8 @@
 #endif
 
 /// LLVM_ATTRIBUTE_ALWAYS_INLINE - On compilers where we have a directive to do
-/// so, mark a method "always inline" because it is performance sensitive. GCC
-/// 3.4 supported this but is buggy in various cases and produces unimplemented
-/// errors, just use it in GCC 4.0 and later.
-#if __has_attribute(always_inline) || LLVM_GNUC_PREREQ(4, 0, 0)
+/// so, mark a method "always inline" because it is performance sensitive.
+#if __has_attribute(always_inline)
 #define LLVM_ATTRIBUTE_ALWAYS_INLINE inline __attribute__((always_inline))
 #elif defined(_MSC_VER)
 #define LLVM_ATTRIBUTE_ALWAYS_INLINE __forceinline
@@ -242,15 +246,16 @@
 #define LLVM_ATTRIBUTE_ALWAYS_INLINE inline
 #endif
 
-#ifdef __GNUC__
-#define LLVM_ATTRIBUTE_NORETURN __attribute__((noreturn))
-#elif defined(_MSC_VER)
-#define LLVM_ATTRIBUTE_NORETURN __declspec(noreturn)
+/// LLVM_ATTRIBUTE_NO_DEBUG - On compilers where we have a directive to do
+/// so, mark a method "no debug" because debug info makes the debugger
+/// experience worse.
+#if __has_attribute(nodebug)
+#define LLVM_ATTRIBUTE_NODEBUG __attribute__((nodebug))
 #else
-#define LLVM_ATTRIBUTE_NORETURN
+#define LLVM_ATTRIBUTE_NODEBUG
 #endif
 
-#if __has_attribute(returns_nonnull) || LLVM_GNUC_PREREQ(4, 9, 0)
+#if __has_attribute(returns_nonnull)
 #define LLVM_ATTRIBUTE_RETURNS_NONNULL __attribute__((returns_nonnull))
 #elif defined(_MSC_VER)
 #define LLVM_ATTRIBUTE_RETURNS_NONNULL _Ret_notnull_
@@ -322,15 +327,17 @@
 /// LLVM_BUILTIN_UNREACHABLE - On compilers which support it, expands
 /// to an expression which states that it is undefined behavior for the
 /// compiler to reach this point.  Otherwise is not defined.
-#if __has_builtin(__builtin_unreachable) || LLVM_GNUC_PREREQ(4, 5, 0)
+#if __has_builtin(__builtin_unreachable) || defined(__GNUC__)
 # define LLVM_BUILTIN_UNREACHABLE __builtin_unreachable()
 #elif defined(_MSC_VER)
 # define LLVM_BUILTIN_UNREACHABLE __assume(false)
+#else
+# define LLVM_BUILTIN_UNREACHABLE
 #endif
 
 /// LLVM_BUILTIN_TRAP - On compilers which support it, expands to an expression
 /// which causes the program to exit abnormally.
-#if __has_builtin(__builtin_trap) || LLVM_GNUC_PREREQ(4, 3, 0)
+#if __has_builtin(__builtin_trap) || defined(__GNUC__)
 # define LLVM_BUILTIN_TRAP __builtin_trap()
 #elif defined(_MSC_VER)
 // The __debugbreak intrinsic is supported by MSVC, does not require forward
@@ -361,7 +368,7 @@
 
 /// \macro LLVM_ASSUME_ALIGNED
 /// Returns a pointer with an assumed alignment.
-#if __has_builtin(__builtin_assume_aligned) || LLVM_GNUC_PREREQ(4, 7, 0)
+#if __has_builtin(__builtin_assume_aligned) || defined(__GNUC__)
 # define LLVM_ASSUME_ALIGNED(p, a) __builtin_assume_aligned(p, a)
 #elif defined(LLVM_BUILTIN_UNREACHABLE)
 # define LLVM_ASSUME_ALIGNED(p, a) \
@@ -549,4 +556,13 @@ void AnnotateIgnoreWritesEnd(const char *file, int line);
 #define LLVM_ENABLE_EXCEPTIONS 1
 #endif
 
+/// \macro LLVM_NO_PROFILE_INSTRUMENT_FUNCTION
+/// Disable the profile instrument for a function.
+#if __has_attribute(no_profile_instrument_function)
+#define LLVM_NO_PROFILE_INSTRUMENT_FUNCTION                                    \
+  __attribute__((no_profile_instrument_function))
+#else
+#define LLVM_NO_PROFILE_INSTRUMENT_FUNCTION
+#endif
+
 #endif
diff --git a/llvm/include/llvm/Support/CrashRecoveryContext.h b/llvm/include/llvm/Support/CrashRecoveryContext.h
index 498690655fd1..2604ccb38431 100644
--- a/llvm/include/llvm/Support/CrashRecoveryContext.h
+++ b/llvm/include/llvm/Support/CrashRecoveryContext.h
@@ -99,8 +99,7 @@ public:
 
   /// Explicitly trigger a crash recovery in the current process, and
   /// return failure from RunSafely(). This function does not return.
-  LLVM_ATTRIBUTE_NORETURN
-  void HandleExit(int RetCode);
+  [[noreturn]] void HandleExit(int RetCode);
 
   /// Throw again a signal or an exception, after it was catched once by a
   /// CrashRecoveryContext.
diff --git a/llvm/include/llvm/Support/DOTGraphTraits.h b/llvm/include/llvm/Support/DOTGraphTraits.h
index a73538fa1462..ffa9abe328c8 100644
--- a/llvm/include/llvm/Support/DOTGraphTraits.h
+++ b/llvm/include/llvm/Support/DOTGraphTraits.h
@@ -65,6 +65,11 @@ public:
     return false;
   }
 
+  // renderNodesUsingHTML - If the function returns true, nodes will be
+  // rendered using HTML-like labels which allows colors, etc in the nodes
+  // and the edge source labels.
+  static bool renderNodesUsingHTML() { return false; }
+
   /// getNodeLabel - Given a node and a pointer to the top level graph, return
   /// the label to print in the node.
   template<typename GraphType>
diff --git a/llvm/include/llvm/Support/DataExtractor.h b/llvm/include/llvm/Support/DataExtractor.h
index f9335c161563..f4f5905d4bcc 100644
--- a/llvm/include/llvm/Support/DataExtractor.h
+++ b/llvm/include/llvm/Support/DataExtractor.h
@@ -70,6 +70,9 @@ public:
     /// the position of the Cursor before the first error was encountered.
     uint64_t tell() const { return Offset; }
 
+    /// Set the cursor to the new offset. This does not impact the error state.
+    void seek(uint64_t NewOffSet) { Offset = NewOffSet; }
+
     /// Return error contained inside this Cursor, if any. Clears the internal
     /// Cursor state.
     Error takeError() { return std::move(Err); }
diff --git a/llvm/include/llvm/Support/Debug.h b/llvm/include/llvm/Support/Debug.h
index 64b730951bda..2ff978476c79 100644
--- a/llvm/include/llvm/Support/Debug.h
+++ b/llvm/include/llvm/Support/Debug.h
@@ -78,27 +78,6 @@ void setCurrentDebugTypes(const char **Types, unsigned Count);
 ///
 extern bool DebugFlag;
 
-/// \name Verification flags.
-///
-/// These flags turns on/off that are expensive and are turned off by default,
-/// unless macro EXPENSIVE_CHECKS is defined. The flags allow selectively
-/// turning the checks on without need to recompile.
-/// \{
-
-/// Enables verification of dominator trees.
-///
-extern bool VerifyDomInfo;
-
-/// Enables verification of loop info.
-///
-extern bool VerifyLoopInfo;
-
-/// Enables verification of MemorySSA.
-///
-extern bool VerifyMemorySSA;
-
-///\}
-
 /// EnableDebugBuffering - This defaults to false.  If true, the debug
 /// stream will install signal handlers to dump any buffered debug
 /// output.  It allows clients to selectively allow the debug stream
diff --git a/llvm/include/llvm/Support/DivisionByConstantInfo.h b/llvm/include/llvm/Support/DivisionByConstantInfo.h
new file mode 100644
index 000000000000..5bb326178c3e
--- /dev/null
+++ b/llvm/include/llvm/Support/DivisionByConstantInfo.h
@@ -0,0 +1,38 @@
+//== llvm/Support/DivisonByConstantInfo.h - division by constant -*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// This file implements support for optimizing divisions by a constant
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_DIVISON_BY_CONSTANT_INFO_H
+#define LLVM_SUPPORT_DIVISON_BY_CONSTANT_INFO_H
+
+#include "llvm/ADT/APInt.h"
+
+namespace llvm {
+
+/// Magic data for optimising signed division by a constant.
+struct SignedDivisionByConstantInfo {
+  static SignedDivisionByConstantInfo get(const APInt &D);
+  APInt Magic;          ///< magic number
+  unsigned ShiftAmount; ///< shift amount
+};
+
+/// Magic data for optimising unsigned division by a constant.
+struct UnsignedDivisonByConstantInfo {
+  static UnsignedDivisonByConstantInfo get(const APInt &D,
+                                           unsigned LeadingZeros = 0);
+  APInt Magic;          ///< magic number
+  bool IsAdd;           ///< add indicator
+  unsigned ShiftAmount; ///< shift amount
+};
+
+} // namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/Support/Error.h b/llvm/include/llvm/Support/Error.h
index e8f340e452ef..e2002b89ada2 100644
--- a/llvm/include/llvm/Support/Error.h
+++ b/llvm/include/llvm/Support/Error.h
@@ -257,8 +257,7 @@ private:
   // of debug prints can cause the function to be too large for inlining.  So
   // it's important that we define this function out of line so that it can't be
   // inlined.
-  LLVM_ATTRIBUTE_NORETURN
-  void fatalUncheckedError() const;
+  [[noreturn]] void fatalUncheckedError() const;
 #endif
 
   void assertIsChecked() {
@@ -314,7 +313,7 @@ private:
   }
 
   friend raw_ostream &operator<<(raw_ostream &OS, const Error &E) {
-    if (auto P = E.getPtr())
+    if (auto *P = E.getPtr())
       P->log(OS);
     else
       OS << "success";
@@ -374,7 +373,7 @@ class ErrorList final : public ErrorInfo<ErrorList> {
 public:
   void log(raw_ostream &OS) const override {
     OS << "Multiple errors:\n";
-    for (auto &ErrPayload : Payloads) {
+    for (const auto &ErrPayload : Payloads) {
       ErrPayload->log(OS);
       OS << "\n";
     }
@@ -578,6 +577,16 @@ public:
     return const_cast<Expected<T> *>(this)->get();
   }
 
+  /// Returns \a takeError() after moving the held T (if any) into \p V.
+  template <class OtherT>
+  Error moveInto(OtherT &Value,
+                 std::enable_if_t<std::is_assignable<OtherT &, T &&>::value> * =
+                     nullptr) && {
+    if (*this)
+      Value = std::move(get());
+    return takeError();
+  }
+
   /// Check that this Expected<T> is an error of type ErrT.
   template <typename ErrT> bool errorIsA() const {
     return HasError && (*getErrorStorage())->template isA<ErrT>();
@@ -688,9 +697,7 @@ private:
   }
 
 #if LLVM_ENABLE_ABI_BREAKING_CHECKS
-  LLVM_ATTRIBUTE_NORETURN
-  LLVM_ATTRIBUTE_NOINLINE
-  void fatalUncheckedExpected() const {
+  [[noreturn]] LLVM_ATTRIBUTE_NOINLINE void fatalUncheckedExpected() const {
     dbgs() << "Expected<T> must be checked before access or destruction.\n";
     if (HasError) {
       dbgs() << "Unchecked Expected<T> contained error:\n";
@@ -722,8 +729,7 @@ private:
 
 /// Report a serious error, calling any installed error handler. See
 /// ErrorHandling.h.
-LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err,
-                                                bool gen_crash_diag = true);
+[[noreturn]] void report_fatal_error(Error Err, bool gen_crash_diag = true);
 
 /// Report a fatal error if Err is a failure value.
 ///
@@ -1159,7 +1165,7 @@ protected:
 /// It should only be used in this situation, and should never be used where a
 /// sensible conversion to std::error_code is available, as attempts to convert
 /// to/from this error will result in a fatal error. (i.e. it is a programmatic
-///error to try to convert such a value).
+/// error to try to convert such a value).
 std::error_code inconvertibleErrorCode();
 
 /// Helper for converting an std::error_code to a Error.
@@ -1263,13 +1269,20 @@ class FileError final : public ErrorInfo<FileError> {
 
 public:
   void log(raw_ostream &OS) const override {
-    assert(Err && !FileName.empty() && "Trying to log after takeError().");
+    assert(Err && "Trying to log after takeError().");
     OS << "'" << FileName << "': ";
     if (Line.hasValue())
       OS << "line " << Line.getValue() << ": ";
     Err->log(OS);
   }
 
+  std::string messageWithoutFileInfo() const {
+    std::string Msg;
+    raw_string_ostream OS(Msg);
+    Err->log(OS);
+    return OS.str();
+  }
+
   StringRef getFileName() { return FileName; }
 
   Error takeError() { return Error(std::move(Err)); }
@@ -1283,8 +1296,6 @@ private:
   FileError(const Twine &F, Optional<size_t> LineNum,
             std::unique_ptr<ErrorInfoBase> E) {
     assert(E && "Cannot create FileError from Error success value.");
-    assert(!F.isTriviallyEmpty() &&
-           "The file name provided to FileError must not be empty.");
     FileName = F.str();
     Err = std::move(E);
     Line = std::move(LineNum);
diff --git a/llvm/include/llvm/Support/ErrorHandling.h b/llvm/include/llvm/Support/ErrorHandling.h
index 0ec0242d569d..f980510d37f0 100644
--- a/llvm/include/llvm/Support/ErrorHandling.h
+++ b/llvm/include/llvm/Support/ErrorHandling.h
@@ -15,15 +15,14 @@
 #define LLVM_SUPPORT_ERRORHANDLING_H
 
 #include "llvm/Support/Compiler.h"
-#include <string>
 
 namespace llvm {
-class StringRef;
+  class StringRef;
   class Twine;
 
   /// An error handler callback.
   typedef void (*fatal_error_handler_t)(void *user_data,
-                                        const std::string& reason,
+                                        const char *reason,
                                         bool gen_crash_diag);
 
   /// install_fatal_error_handler - Installs a new error handler to be used
@@ -68,14 +67,13 @@ class StringRef;
 /// standard error, followed by a newline.
 /// After the error handler is called this function will call abort(), it
 /// does not return.
-LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const char *reason,
-                                                bool gen_crash_diag = true);
-LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const std::string &reason,
-                                                bool gen_crash_diag = true);
-LLVM_ATTRIBUTE_NORETURN void report_fatal_error(StringRef reason,
-                                                bool gen_crash_diag = true);
-LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const Twine &reason,
-                                                bool gen_crash_diag = true);
+/// NOTE: The std::string variant was removed to avoid a <string> dependency.
+[[noreturn]] void report_fatal_error(const char *reason,
+                                     bool gen_crash_diag = true);
+[[noreturn]] void report_fatal_error(StringRef reason,
+                                     bool gen_crash_diag = true);
+[[noreturn]] void report_fatal_error(const Twine &reason,
+                                     bool gen_crash_diag = true);
 
 /// Installs a new bad alloc error handler that should be used whenever a
 /// bad alloc error, e.g. failing malloc/calloc, is encountered by LLVM.
@@ -113,13 +111,13 @@ void install_out_of_memory_new_handler();
 /// If no error handler is installed (default), throws a bad_alloc exception
 /// if LLVM is compiled with exception support. Otherwise prints the error
 /// to standard error and calls abort().
-LLVM_ATTRIBUTE_NORETURN void report_bad_alloc_error(const char *Reason,
-                                                    bool GenCrashDiag = true);
+[[noreturn]] void report_bad_alloc_error(const char *Reason,
+                                         bool GenCrashDiag = true);
 
 /// This function calls abort(), and prints the optional message to stderr.
 /// Use the llvm_unreachable macro (that adds location info), instead of
 /// calling this function directly.
-LLVM_ATTRIBUTE_NORETURN void
+[[noreturn]] void
 llvm_unreachable_internal(const char *msg = nullptr, const char *file = nullptr,
                           unsigned line = 0);
 }
diff --git a/llvm/include/llvm/Support/ExtensibleRTTI.h b/llvm/include/llvm/Support/ExtensibleRTTI.h
index 6b8510ce759f..21055247e932 100644
--- a/llvm/include/llvm/Support/ExtensibleRTTI.h
+++ b/llvm/include/llvm/Support/ExtensibleRTTI.h
@@ -1,9 +1,8 @@
 //===-- llvm/Support/ExtensibleRTTI.h - ExtensibleRTTI support --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/llvm/include/llvm/Support/FileSystem.h b/llvm/include/llvm/Support/FileSystem.h
index 38779ef4a3af..1a049533b82b 100644
--- a/llvm/include/llvm/Support/FileSystem.h
+++ b/llvm/include/llvm/Support/FileSystem.h
@@ -772,7 +772,8 @@ enum OpenFlags : unsigned {
   /// The file should be opened in append mode.
   OF_Append = 4,
 
-  /// Delete the file on close. Only makes a difference on windows.
+  /// The returned handle can be used for deleting the file. Only makes a
+  /// difference on windows.
   OF_Delete = 8,
 
   /// When a child process is launched, this file should remain open in the
@@ -865,6 +866,11 @@ public:
   // The open file descriptor.
   int FD = -1;
 
+#ifdef _WIN32
+  // Whether we need to manually remove the file on close.
+  bool RemoveOnClose = false;
+#endif
+
   // Keep this with the given name.
   Error keep(const Twine &Name);
 
diff --git a/llvm/include/llvm/Support/FileSystem/UniqueID.h b/llvm/include/llvm/Support/FileSystem/UniqueID.h
index 229410c8292e..0d5367236e8d 100644
--- a/llvm/include/llvm/Support/FileSystem/UniqueID.h
+++ b/llvm/include/llvm/Support/FileSystem/UniqueID.h
@@ -14,7 +14,10 @@
 #ifndef LLVM_SUPPORT_FILESYSTEM_UNIQUEID_H
 #define LLVM_SUPPORT_FILESYSTEM_UNIQUEID_H
 
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/ADT/Hashing.h"
 #include <cstdint>
+#include <utility>
 
 namespace llvm {
 namespace sys {
@@ -47,6 +50,30 @@ public:
 
 } // end namespace fs
 } // end namespace sys
+
+// Support UniqueIDs as DenseMap keys.
+template <> struct DenseMapInfo<llvm::sys::fs::UniqueID> {
+  static inline llvm::sys::fs::UniqueID getEmptyKey() {
+    auto EmptyKey = DenseMapInfo<std::pair<uint64_t, uint64_t>>::getEmptyKey();
+    return {EmptyKey.first, EmptyKey.second};
+  }
+
+  static inline llvm::sys::fs::UniqueID getTombstoneKey() {
+    auto TombstoneKey =
+        DenseMapInfo<std::pair<uint64_t, uint64_t>>::getTombstoneKey();
+    return {TombstoneKey.first, TombstoneKey.second};
+  }
+
+  static hash_code getHashValue(const llvm::sys::fs::UniqueID &Tag) {
+    return hash_value(std::make_pair(Tag.getDevice(), Tag.getFile()));
+  }
+
+  static bool isEqual(const llvm::sys::fs::UniqueID &LHS,
+                      const llvm::sys::fs::UniqueID &RHS) {
+    return LHS == RHS;
+  }
+};
+
 } // end namespace llvm
 
 #endif // LLVM_SUPPORT_FILESYSTEM_UNIQUEID_H
diff --git a/llvm/include/llvm/Support/FormatVariadic.h b/llvm/include/llvm/Support/FormatVariadic.h
index 094b054f773f..89575f01b717 100644
--- a/llvm/include/llvm/Support/FormatVariadic.h
+++ b/llvm/include/llvm/Support/FormatVariadic.h
@@ -94,7 +94,7 @@ public:
         continue;
       }
 
-      auto W = Adapters[R.Index];
+      auto *W = Adapters[R.Index];
 
       FmtAlign Align(*W, R.Where, R.Align, R.Pad);
       Align.format(S, R.Options);
diff --git a/llvm/include/llvm/Support/GenericDomTreeConstruction.h b/llvm/include/llvm/Support/GenericDomTreeConstruction.h
index d306ebe99bc1..e504a0eddeba 100644
--- a/llvm/include/llvm/Support/GenericDomTreeConstruction.h
+++ b/llvm/include/llvm/Support/GenericDomTreeConstruction.h
@@ -78,7 +78,7 @@ struct SemiNCAInfo {
   using UpdateT = typename DomTreeT::UpdateType;
   using UpdateKind = typename DomTreeT::UpdateKind;
   struct BatchUpdateInfo {
-    // Note: Updates inside PreViewCFG are aleady legalized.
+    // Note: Updates inside PreViewCFG are already legalized.
     BatchUpdateInfo(GraphDiffT &PreViewCFG, GraphDiffT *PostViewCFG = nullptr)
         : PreViewCFG(PreViewCFG), PostViewCFG(PostViewCFG),
           NumLegalized(PreViewCFG.getNumLegalizedUpdates()) {}
@@ -430,7 +430,6 @@ struct SemiNCAInfo {
       // is unreachable. This is because we are still going to only visit each
       // unreachable node once, we may just visit it in two directions,
       // depending on how lucky we get.
-      SmallPtrSet<NodePtr, 4> ConnectToExitBlock;
       for (const NodePtr I : nodes(DT.Parent)) {
         if (SNCA.NodeToInfo.count(I) == 0) {
           LLVM_DEBUG(dbgs()
@@ -457,7 +456,6 @@ struct SemiNCAInfo {
           LLVM_DEBUG(dbgs() << "\t\t\tFound a new furthest away node "
                             << "(non-trivial root): "
                             << BlockNamePrinter(FurthestAway) << "\n");
-          ConnectToExitBlock.insert(FurthestAway);
           Roots.push_back(FurthestAway);
           LLVM_DEBUG(dbgs() << "\t\t\tPrev DFSNum: " << Num << ", new DFSNum: "
                             << NewNum << "\n\t\t\tRemoving DFS info\n");
diff --git a/llvm/include/llvm/Support/GraphWriter.h b/llvm/include/llvm/Support/GraphWriter.h
index b886bf45f474..11a31bf40160 100644
--- a/llvm/include/llvm/Support/GraphWriter.h
+++ b/llvm/include/llvm/Support/GraphWriter.h
@@ -66,6 +66,7 @@ template<typename GraphType>
 class GraphWriter {
   raw_ostream &O;
   const GraphType &G;
+  bool RenderUsingHTML = false;
 
   using DOTTraits = DOTGraphTraits<GraphType>;
   using GTraits = GraphTraits<GraphType>;
@@ -86,6 +87,9 @@ class GraphWriter {
     child_iterator EE = GTraits::child_end(Node);
     bool hasEdgeSourceLabels = false;
 
+    if (RenderUsingHTML)
+      O << "</tr><tr>";
+
     for (unsigned i = 0; EI != EE && i != 64; ++EI, ++i) {
       std::string label = DTraits.getEdgeSourceLabel(Node, EI);
 
@@ -94,14 +98,22 @@ class GraphWriter {
 
       hasEdgeSourceLabels = true;
 
-      if (i)
-        O << "|";
+      if (RenderUsingHTML)
+        O << "<td colspan=\"1\" port=\"s" << i << "\">" << label << "</td>";
+      else {
+        if (i)
+          O << "|";
 
-      O << "<s" << i << ">" << DOT::EscapeString(label);
+        O << "<s" << i << ">" << DOT::EscapeString(label);
+      }
     }
 
-    if (EI != EE && hasEdgeSourceLabels)
-      O << "|<s64>truncated...";
+    if (EI != EE && hasEdgeSourceLabels) {
+      if (RenderUsingHTML)
+        O << "<td colspan=\"1\" port=\"s64\">truncated...</td>";
+      else
+        O << "|<s64>truncated...";
+    }
 
     return hasEdgeSourceLabels;
   }
@@ -109,6 +121,7 @@ class GraphWriter {
 public:
   GraphWriter(raw_ostream &o, const GraphType &g, bool SN) : O(o), G(g) {
     DTraits = DOTTraits(SN);
+    RenderUsingHTML = DTraits.renderNodesUsingHTML();
   }
 
   void writeGraph(const std::string &Title = "") {
@@ -163,12 +176,39 @@ public:
   void writeNode(NodeRef Node) {
     std::string NodeAttributes = DTraits.getNodeAttributes(Node, G);
 
-    O << "\tNode" << static_cast<const void*>(Node) << " [shape=record,";
+    O << "\tNode" << static_cast<const void *>(Node) << " [shape=";
+    if (RenderUsingHTML)
+      O << "none,";
+    else
+      O << "record,";
+
     if (!NodeAttributes.empty()) O << NodeAttributes << ",";
-    O << "label=\"{";
+    O << "label=";
+
+    if (RenderUsingHTML) {
+      // Count the numbewr of edges out of the node to determine how
+      // many columns to span (max 64)
+      unsigned ColSpan = 0;
+      child_iterator EI = GTraits::child_begin(Node);
+      child_iterator EE = GTraits::child_end(Node);
+      for (; EI != EE && ColSpan != 64; ++EI, ++ColSpan)
+        ;
+      if (ColSpan == 0)
+        ColSpan = 1;
+      // Include truncated messages when counting.
+      if (EI != EE)
+        ++ColSpan;
+      O << "<<table border=\"0\" cellborder=\"1\" cellspacing=\"0\""
+        << " cellpadding=\"0\"><tr><td align=\"text\" colspan=\"" << ColSpan
+        << "\">";
+    } else
+      O << "\"{";
 
     if (!DTraits.renderGraphFromBottomUp()) {
-      O << DOT::EscapeString(DTraits.getNodeLabel(Node, G));
+      if (RenderUsingHTML)
+        O << DTraits.getNodeLabel(Node, G) << "</td>";
+      else
+        O << DOT::EscapeString(DTraits.getNodeLabel(Node, G));
 
       // If we should include the address of the node in the label, do so now.
       std::string Id = DTraits.getNodeIdentifierLabel(Node, G);
@@ -185,15 +225,25 @@ public:
     bool hasEdgeSourceLabels = getEdgeSourceLabels(EdgeSourceLabels, Node);
 
     if (hasEdgeSourceLabels) {
-      if (!DTraits.renderGraphFromBottomUp()) O << "|";
-
-      O << "{" << EdgeSourceLabels.str() << "}";
-
-      if (DTraits.renderGraphFromBottomUp()) O << "|";
+      if (!DTraits.renderGraphFromBottomUp())
+        if (!RenderUsingHTML)
+          O << "|";
+
+      if (RenderUsingHTML)
+        O << EdgeSourceLabels.str();
+      else
+        O << "{" << EdgeSourceLabels.str() << "}";
+
+      if (DTraits.renderGraphFromBottomUp())
+        if (!RenderUsingHTML)
+          O << "|";
     }
 
     if (DTraits.renderGraphFromBottomUp()) {
-      O << DOT::EscapeString(DTraits.getNodeLabel(Node, G));
+      if (RenderUsingHTML)
+        O << DTraits.getNodeLabel(Node, G);
+      else
+        O << DOT::EscapeString(DTraits.getNodeLabel(Node, G));
 
       // If we should include the address of the node in the label, do so now.
       std::string Id = DTraits.getNodeIdentifierLabel(Node, G);
@@ -215,12 +265,17 @@ public:
           << DOT::EscapeString(DTraits.getEdgeDestLabel(Node, i));
       }
 
-      if (i != e)
-        O << "|<d64>truncated...";
-      O << "}";
+      if (RenderUsingHTML)
+        O << "<td colspan=\"1\">... truncated</td>";
+      else if (i != e)
+        O << "|<d64>truncated...}";
     }
 
-    O << "}\"];\n";   // Finish printing the "node" line
+    if (RenderUsingHTML)
+      O << "</tr></table>>";
+    else
+      O << "}\"";
+    O << "];\n"; // Finish printing the "node" line
 
     // Output all of the edges now
     child_iterator EI = GTraits::child_begin(Node);
diff --git a/llvm/include/llvm/Support/HashBuilder.h b/llvm/include/llvm/Support/HashBuilder.h
new file mode 100644
index 000000000000..bf93a0d22da7
--- /dev/null
+++ b/llvm/include/llvm/Support/HashBuilder.h
@@ -0,0 +1,438 @@
+//===- llvm/Support/HashBuilder.h - Convenient hashing interface-*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements an interface allowing to conveniently build hashes of
+// various data types, without relying on the underlying hasher type to know
+// about hashed data types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_HASHBUILDER_H
+#define LLVM_SUPPORT_HASHBUILDER_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/type_traits.h"
+
+#include <iterator>
+#include <utility>
+
+namespace llvm {
+
+namespace hashbuilder_detail {
+/// Trait to indicate whether a type's bits can be hashed directly (after
+/// endianness correction).
+template <typename U>
+struct IsHashableData
+    : std::integral_constant<bool, is_integral_or_enum<U>::value> {};
+
+} // namespace hashbuilder_detail
+
+/// Declares the hasher member, and functions forwarding directly to the hasher.
+template <typename HasherT> class HashBuilderBase {
+public:
+  HasherT &getHasher() { return Hasher; }
+
+  /// Forward to `HasherT::update(ArrayRef<uint8_t>)`.
+  ///
+  /// This may not take the size of `Data` into account.
+  /// Users of this function should pay attention to respect endianness
+  /// contraints.
+  void update(ArrayRef<uint8_t> Data) { this->getHasher().update(Data); }
+
+  /// Forward to `HasherT::update(ArrayRef<uint8_t>)`.
+  ///
+  /// This may not take the size of `Data` into account.
+  /// Users of this function should pay attention to respect endianness
+  /// contraints.
+  void update(StringRef Data) {
+    update(makeArrayRef(reinterpret_cast<const uint8_t *>(Data.data()),
+                        Data.size()));
+  }
+
+  /// Forward to `HasherT::final()` if available.
+  template <typename HasherT_ = HasherT> StringRef final() {
+    return this->getHasher().final();
+  }
+
+  /// Forward to `HasherT::result()` if available.
+  template <typename HasherT_ = HasherT> StringRef result() {
+    return this->getHasher().result();
+  }
+
+protected:
+  explicit HashBuilderBase(HasherT &Hasher) : Hasher(Hasher) {}
+
+  template <typename... ArgTypes>
+  explicit HashBuilderBase(ArgTypes &&...Args)
+      : OptionalHasher(in_place, std::forward<ArgTypes>(Args)...),
+        Hasher(*OptionalHasher) {}
+
+private:
+  Optional<HasherT> OptionalHasher;
+  HasherT &Hasher;
+};
+
+/// Implementation of the `HashBuilder` interface.
+///
+/// `support::endianness::native` is not supported. `HashBuilder` is
+/// expected to canonicalize `support::endianness::native` to one of
+/// `support::endianness::big` or `support::endianness::little`.
+template <typename HasherT, support::endianness Endianness>
+class HashBuilderImpl : public HashBuilderBase<HasherT> {
+  static_assert(Endianness != support::endianness::native,
+                "HashBuilder should canonicalize endianness");
+
+public:
+  explicit HashBuilderImpl(HasherT &Hasher)
+      : HashBuilderBase<HasherT>(Hasher) {}
+  template <typename... ArgTypes>
+  explicit HashBuilderImpl(ArgTypes &&...Args)
+      : HashBuilderBase<HasherT>(Args...) {}
+
+  /// Implement hashing for hashable data types, e.g. integral or enum values.
+  template <typename T>
+  std::enable_if_t<hashbuilder_detail::IsHashableData<T>::value,
+                   HashBuilderImpl &>
+  add(T Value) {
+    return adjustForEndiannessAndAdd(Value);
+  }
+
+  /// Support hashing `ArrayRef`.
+  ///
+  /// `Value.size()` is taken into account to ensure cases like
+  /// ```
+  /// builder.add({1});
+  /// builder.add({2, 3});
+  /// ```
+  /// and
+  /// ```
+  /// builder.add({1, 2});
+  /// builder.add({3});
+  /// ```
+  /// do not collide.
+  template <typename T> HashBuilderImpl &add(ArrayRef<T> Value) {
+    // As of implementation time, simply calling `addRange(Value)` would also go
+    // through the `update` fast path. But that would rely on the implementation
+    // details of `ArrayRef::begin()` and `ArrayRef::end()`. Explicitly call
+    // `update` to guarantee the fast path.
+    add(Value.size());
+    if (hashbuilder_detail::IsHashableData<T>::value &&
+        Endianness == support::endian::system_endianness()) {
+      this->update(
+          makeArrayRef(reinterpret_cast<const uint8_t *>(Value.begin()),
+                       Value.size() * sizeof(T)));
+    } else {
+      for (auto &V : Value)
+        add(V);
+    }
+    return *this;
+  }
+
+  /// Support hashing `StringRef`.
+  ///
+  /// `Value.size()` is taken into account to ensure cases like
+  /// ```
+  /// builder.add("a");
+  /// builder.add("bc");
+  /// ```
+  /// and
+  /// ```
+  /// builder.add("ab");
+  /// builder.add("c");
+  /// ```
+  /// do not collide.
+  HashBuilderImpl &add(StringRef Value) {
+    // As of implementation time, simply calling `addRange(Value)` would also go
+    // through `update`. But that would rely on the implementation of
+    // `StringRef::begin()` and `StringRef::end()`. Explicitly call `update` to
+    // guarantee the fast path.
+    add(Value.size());
+    this->update(makeArrayRef(reinterpret_cast<const uint8_t *>(Value.begin()),
+                              Value.size()));
+    return *this;
+  }
+
+  template <typename T>
+  using HasAddHashT =
+      decltype(addHash(std::declval<HashBuilderImpl &>(), std::declval<T &>()));
+  /// Implement hashing for user-defined `struct`s.
+  ///
+  /// Any user-define `struct` can participate in hashing via `HashBuilder` by
+  /// providing a `addHash` templated function.
+  ///
+  /// ```
+  /// template <typename HasherT, support::endianness Endianness>
+  /// void addHash(HashBuilder<HasherT, Endianness> &HBuilder,
+  ///              const UserDefinedStruct &Value);
+  /// ```
+  ///
+  /// For example:
+  /// ```
+  /// struct SimpleStruct {
+  ///   char c;
+  ///   int i;
+  /// };
+  ///
+  /// template <typename HasherT, support::endianness Endianness>
+  /// void addHash(HashBuilderImpl<HasherT, Endianness> &HBuilder,
+  ///              const SimpleStruct &Value) {
+  ///   HBuilder.add(Value.c);
+  ///   HBuilder.add(Value.i);
+  /// }
+  /// ```
+  ///
+  /// To avoid endianness issues, specializations of `addHash` should
+  /// generally rely on exising `add`, `addRange`, and `addRangeElements`
+  /// functions. If directly using `update`, an implementation must correctly
+  /// handle endianness.
+  ///
+  /// ```
+  /// struct __attribute__ ((packed)) StructWithFastHash {
+  ///   int I;
+  ///   char C;
+  ///
+  ///   // If possible, we want to hash both `I` and `C` in a single
+  ///   // `update` call for performance concerns.
+  ///   template <typename HasherT, support::endianness Endianness>
+  ///   friend void addHash(HashBuilderImpl<HasherT, Endianness> &HBuilder,
+  ///                       const StructWithFastHash &Value) {
+  ///     if (Endianness == support::endian::system_endianness()) {
+  ///       HBuilder.update(makeArrayRef(
+  ///           reinterpret_cast<const uint8_t *>(&Value), sizeof(Value)));
+  ///     } else {
+  ///       // Rely on existing `add` methods to handle endianness.
+  ///       HBuilder.add(Value.I);
+  ///       HBuilder.add(Value.C);
+  ///     }
+  ///   }
+  /// };
+  /// ```
+  ///
+  /// To avoid collisions, specialization of `addHash` for variable-size
+  /// types must take the size into account.
+  ///
+  /// For example:
+  /// ```
+  /// struct CustomContainer {
+  /// private:
+  ///   size_t Size;
+  ///   int Elements[100];
+  ///
+  /// public:
+  ///   CustomContainer(size_t Size) : Size(Size) {
+  ///     for (size_t I = 0; I != Size; ++I)
+  ///       Elements[I] = I;
+  ///   }
+  ///   template <typename HasherT, support::endianness Endianness>
+  ///   friend void addHash(HashBuilderImpl<HasherT, Endianness> &HBuilder,
+  ///                       const CustomContainer &Value) {
+  ///     if (Endianness == support::endian::system_endianness()) {
+  ///       HBuilder.update(makeArrayRef(
+  ///           reinterpret_cast<const uint8_t *>(&Value.Size),
+  ///           sizeof(Value.Size) + Value.Size * sizeof(Value.Elements[0])));
+  ///     } else {
+  ///       // `addRange` will take care of encoding the size.
+  ///       HBuilder.addRange(&Value.Elements[0], &Value.Elements[0] +
+  ///       Value.Size);
+  ///     }
+  ///   }
+  /// };
+  /// ```
+  template <typename T>
+  std::enable_if_t<is_detected<HasAddHashT, T>::value &&
+                       !hashbuilder_detail::IsHashableData<T>::value,
+                   HashBuilderImpl &>
+  add(const T &Value) {
+    addHash(*this, Value);
+    return *this;
+  }
+
+  template <typename T1, typename T2>
+  HashBuilderImpl &add(const std::pair<T1, T2> &Value) {
+    add(Value.first);
+    add(Value.second);
+    return *this;
+  }
+
+  template <typename... Ts> HashBuilderImpl &add(const std::tuple<Ts...> &Arg) {
+    return addTupleHelper(Arg, typename std::index_sequence_for<Ts...>());
+  }
+
+  /// A convenenience variadic helper.
+  /// It simply iterates over its arguments, in order.
+  /// ```
+  /// add(Arg1, Arg2);
+  /// ```
+  /// is equivalent to
+  /// ```
+  /// add(Arg1)
+  /// add(Arg2)
+  /// ```
+  template <typename T, typename... Ts>
+  typename std::enable_if<(sizeof...(Ts) >= 1), HashBuilderImpl &>::type
+  add(const T &FirstArg, const Ts &...Args) {
+    add(FirstArg);
+    add(Args...);
+    return *this;
+  }
+
+  template <typename ForwardIteratorT>
+  HashBuilderImpl &addRange(ForwardIteratorT First, ForwardIteratorT Last) {
+    add(std::distance(First, Last));
+    return addRangeElements(First, Last);
+  }
+
+  template <typename RangeT> HashBuilderImpl &addRange(const RangeT &Range) {
+    return addRange(adl_begin(Range), adl_end(Range));
+  }
+
+  template <typename ForwardIteratorT>
+  HashBuilderImpl &addRangeElements(ForwardIteratorT First,
+                                    ForwardIteratorT Last) {
+    return addRangeElementsImpl(
+        First, Last,
+        typename std::iterator_traits<ForwardIteratorT>::iterator_category());
+  }
+
+  template <typename RangeT>
+  HashBuilderImpl &addRangeElements(const RangeT &Range) {
+    return addRangeElements(adl_begin(Range), adl_end(Range));
+  }
+
+  template <typename T>
+  using HasByteSwapT = decltype(support::endian::byte_swap(
+      std::declval<T &>(), support::endianness::little));
+  /// Adjust `Value` for the target endianness and add it to the hash.
+  template <typename T>
+  std::enable_if_t<is_detected<HasByteSwapT, T>::value, HashBuilderImpl &>
+  adjustForEndiannessAndAdd(const T &Value) {
+    T SwappedValue = support::endian::byte_swap(Value, Endianness);
+    this->update(makeArrayRef(reinterpret_cast<const uint8_t *>(&SwappedValue),
+                              sizeof(SwappedValue)));
+    return *this;
+  }
+
+private:
+  template <typename... Ts, std::size_t... Indices>
+  HashBuilderImpl &addTupleHelper(const std::tuple<Ts...> &Arg,
+                                  std::index_sequence<Indices...>) {
+    add(std::get<Indices>(Arg)...);
+    return *this;
+  }
+
+  // FIXME: Once available, specialize this function for `contiguous_iterator`s,
+  // and use it for `ArrayRef` and `StringRef`.
+  template <typename ForwardIteratorT>
+  HashBuilderImpl &addRangeElementsImpl(ForwardIteratorT First,
+                                        ForwardIteratorT Last,
+                                        std::forward_iterator_tag) {
+    for (auto It = First; It != Last; ++It)
+      add(*It);
+    return *this;
+  }
+
+  template <typename T>
+  std::enable_if_t<hashbuilder_detail::IsHashableData<T>::value &&
+                       Endianness == support::endian::system_endianness(),
+                   HashBuilderImpl &>
+  addRangeElementsImpl(T *First, T *Last, std::forward_iterator_tag) {
+    this->update(makeArrayRef(reinterpret_cast<const uint8_t *>(First),
+                              (Last - First) * sizeof(T)));
+    return *this;
+  }
+};
+
+/// Interface to help hash various types through a hasher type.
+///
+/// Via provided specializations of `add`, `addRange`, and `addRangeElements`
+/// functions, various types (e.g. `ArrayRef`, `StringRef`, etc.) can be hashed
+/// without requiring any knowledge of hashed types from the hasher type.
+///
+/// The only method expected from the templated hasher type `HasherT` is:
+/// * void update(ArrayRef<uint8_t> Data)
+///
+/// Additionally, the following methods will be forwarded to the hasher type:
+/// * decltype(std::declval<HasherT &>().final()) final()
+/// * decltype(std::declval<HasherT &>().result()) result()
+///
+/// From a user point of view, the interface provides the following:
+/// * `template<typename T> add(const T &Value)`
+///   The `add` function implements hashing of various types.
+/// * `template <typename ItT> void addRange(ItT First, ItT Last)`
+///   The `addRange` function is designed to aid hashing a range of values.
+///   It explicitly adds the size of the range in the hash.
+/// * `template <typename ItT> void addRangeElements(ItT First, ItT Last)`
+///   The `addRangeElements` function is also designed to aid hashing a range of
+///   values. In contrast to `addRange`, it **ignores** the size of the range,
+///   behaving as if elements were added one at a time with `add`.
+///
+/// User-defined `struct` types can participate in this interface by providing
+/// an `addHash` templated function. See the associated template specialization
+/// for details.
+///
+/// This interface does not impose requirements on the hasher
+/// `update(ArrayRef<uint8_t> Data)` method. We want to avoid collisions for
+/// variable-size types; for example for
+/// ```
+/// builder.add({1});
+/// builder.add({2, 3});
+/// ```
+/// and
+/// ```
+/// builder.add({1, 2});
+/// builder.add({3});
+/// ```
+/// . Thus, specializations of `add` and `addHash` for variable-size types must
+/// not assume that the hasher type considers the size as part of the hash; they
+/// must explicitly add the size to the hash. See for example specializations
+/// for `ArrayRef` and `StringRef`.
+///
+/// Additionally, since types are eventually forwarded to the hasher's
+/// `void update(ArrayRef<uint8_t>)` method, endianness plays a role in the hash
+/// computation (for example when computing `add((int)123)`).
+/// Specifiying a non-`native` `Endianness` template parameter allows to compute
+/// stable hash across platforms with different endianness.
+template <class HasherT, support::endianness Endianness>
+using HashBuilder =
+    HashBuilderImpl<HasherT, (Endianness == support::endianness::native
+                                  ? support::endian::system_endianness()
+                                  : Endianness)>;
+
+namespace hashbuilder_detail {
+class HashCodeHasher {
+public:
+  HashCodeHasher() : Code(0) {}
+  void update(ArrayRef<uint8_t> Data) {
+    hash_code DataCode = hash_value(Data);
+    Code = hash_combine(Code, DataCode);
+  }
+  hash_code Code;
+};
+
+using HashCodeHashBuilder = HashBuilder<hashbuilder_detail::HashCodeHasher,
+                                        support::endianness::native>;
+} // namespace hashbuilder_detail
+
+/// Provide a default implementation of `hash_value` when `addHash(const T &)`
+/// is supported.
+template <typename T>
+std::enable_if_t<
+    is_detected<hashbuilder_detail::HashCodeHashBuilder::HasAddHashT, T>::value,
+    hash_code>
+hash_value(const T &Value) {
+  hashbuilder_detail::HashCodeHashBuilder HBuilder;
+  HBuilder.add(Value);
+  return HBuilder.getHasher().Code;
+}
+} // end namespace llvm
+
+#endif // LLVM_SUPPORT_HASHBUILDER_H
diff --git a/llvm/include/llvm/Support/JSON.h b/llvm/include/llvm/Support/JSON.h
index c753cee60ec1..469f50be40e0 100644
--- a/llvm/include/llvm/Support/JSON.h
+++ b/llvm/include/llvm/Support/JSON.h
@@ -234,7 +234,7 @@ inline bool operator!=(const Array &L, const Array &R) { return !(L == R); }
 /// Each Value is one of the JSON kinds:
 ///   null    (nullptr_t)
 ///   boolean (bool)
-///   number  (double or int64)
+///   number  (double, int64 or uint64)
 ///   string  (StringRef)
 ///   array   (json::Array)
 ///   object  (json::Object)
@@ -342,9 +342,20 @@ public:
   Value(T B) : Type(T_Boolean) {
     create<bool>(B);
   }
-  // Integers (except boolean). Must be non-narrowing convertible to int64_t.
+
+  // Unsigned 64-bit long integers.
+  template <typename T,
+            typename = std::enable_if_t<std::is_same<T, uint64_t>::value>,
+            bool = false, bool = false>
+  Value(T V) : Type(T_UINT64) {
+    create<uint64_t>(uint64_t{V});
+  }
+
+  // Integers (except boolean and uint64_t).
+  // Must be non-narrowing convertible to int64_t.
   template <typename T, typename = std::enable_if_t<std::is_integral<T>::value>,
-            typename = std::enable_if_t<!std::is_same<T, bool>::value>>
+            typename = std::enable_if_t<!std::is_same<T, bool>::value>,
+            typename = std::enable_if_t<!std::is_same<T, uint64_t>::value>>
   Value(T I) : Type(T_Integer) {
     create<int64_t>(int64_t{I});
   }
@@ -382,6 +393,7 @@ public:
       return Boolean;
     case T_Double:
     case T_Integer:
+    case T_UINT64:
       return Number;
     case T_String:
     case T_StringRef:
@@ -410,6 +422,8 @@ public:
       return as<double>();
     if (LLVM_LIKELY(Type == T_Integer))
       return as<int64_t>();
+    if (LLVM_LIKELY(Type == T_UINT64))
+      return as<uint64_t>();
     return llvm::None;
   }
   // Succeeds if the Value is a Number, and exactly representable as int64_t.
@@ -425,6 +439,16 @@ public:
     }
     return llvm::None;
   }
+  llvm::Optional<uint64_t> getAsUINT64() const {
+    if (Type == T_UINT64)
+      return as<uint64_t>();
+    else if (Type == T_Integer) {
+      int64_t N = as<int64_t>();
+      if (N >= 0)
+        return as<uint64_t>();
+    }
+    return llvm::None;
+  }
   llvm::Optional<llvm::StringRef> getAsString() const {
     if (Type == T_String)
       return llvm::StringRef(as<std::string>());
@@ -467,11 +491,12 @@ private:
 
   friend class OStream;
 
-  enum ValueType : char {
+  enum ValueType : char16_t {
     T_Null,
     T_Boolean,
     T_Double,
     T_Integer,
+    T_UINT64,
     T_StringRef,
     T_String,
     T_Object,
@@ -479,8 +504,9 @@ private:
   };
   // All members mutable, see moveFrom().
   mutable ValueType Type;
-  mutable llvm::AlignedCharArrayUnion<bool, double, int64_t, llvm::StringRef,
-                                      std::string, json::Array, json::Object>
+  mutable llvm::AlignedCharArrayUnion<bool, double, int64_t, uint64_t,
+                                      llvm::StringRef, std::string, json::Array,
+                                      json::Object>
       Union;
   friend bool operator==(const Value &, const Value &);
 };
@@ -683,6 +709,14 @@ inline bool fromJSON(const Value &E, bool &Out, Path P) {
   P.report("expected boolean");
   return false;
 }
+inline bool fromJSON(const Value &E, uint64_t &Out, Path P) {
+  if (auto S = E.getAsUINT64()) {
+    Out = *S;
+    return true;
+  }
+  P.report("expected uint64_t");
+  return false;
+}
 inline bool fromJSON(const Value &E, std::nullptr_t &Out, Path P) {
   if (auto S = E.getAsNull()) {
     Out = *S;
diff --git a/llvm/include/llvm/Support/KnownBits.h b/llvm/include/llvm/Support/KnownBits.h
index cfec5796493f..1f32760a6fd1 100644
--- a/llvm/include/llvm/Support/KnownBits.h
+++ b/llvm/include/llvm/Support/KnownBits.h
@@ -60,7 +60,7 @@ public:
   }
 
   /// Returns true if we don't know any bits.
-  bool isUnknown() const { return Zero.isNullValue() && One.isNullValue(); }
+  bool isUnknown() const { return Zero.isZero() && One.isZero(); }
 
   /// Resets the known state of all bits.
   void resetAll() {
@@ -71,13 +71,13 @@ public:
   /// Returns true if value is all zero.
   bool isZero() const {
     assert(!hasConflict() && "KnownBits conflict!");
-    return Zero.isAllOnesValue();
+    return Zero.isAllOnes();
   }
 
   /// Returns true if value is all one bits.
   bool isAllOnes() const {
     assert(!hasConflict() && "KnownBits conflict!");
-    return One.isAllOnesValue();
+    return One.isAllOnes();
   }
 
   /// Make all bits known to be zero and discard any previous information.
@@ -99,10 +99,12 @@ public:
   bool isNonNegative() const { return Zero.isSignBitSet(); }
 
   /// Returns true if this value is known to be non-zero.
-  bool isNonZero() const { return !One.isNullValue(); }
+  bool isNonZero() const { return !One.isZero(); }
 
   /// Returns true if this value is known to be positive.
-  bool isStrictlyPositive() const { return Zero.isSignBitSet() && !One.isNullValue(); }
+  bool isStrictlyPositive() const {
+    return Zero.isSignBitSet() && !One.isZero();
+  }
 
   /// Make this value negative.
   void makeNegative() {
@@ -280,6 +282,10 @@ public:
     return getBitWidth() - Zero.countPopulation();
   }
 
+  unsigned countMaxActiveBits() const {
+    return getBitWidth() - countMinLeadingZeros();
+  }
+
   /// Create known bits from a known constant.
   static KnownBits makeConstant(const APInt &C) {
     return KnownBits(~C, C);
@@ -292,7 +298,7 @@ public:
 
   /// Return true if LHS and RHS have no common bits set.
   static bool haveNoCommonBitsSet(const KnownBits &LHS, const KnownBits &RHS) {
-    return (LHS.Zero | RHS.Zero).isAllOnesValue();
+    return (LHS.Zero | RHS.Zero).isAllOnes();
   }
 
   /// Compute known bits resulting from adding LHS, RHS and a 1-bit Carry.
@@ -304,7 +310,8 @@ public:
                                     KnownBits RHS);
 
   /// Compute known bits resulting from multiplying LHS and RHS.
-  static KnownBits mul(const KnownBits &LHS, const KnownBits &RHS);
+  static KnownBits mul(const KnownBits &LHS, const KnownBits &RHS,
+                       bool SelfMultiply = false);
 
   /// Compute known bits from sign-extended multiply-hi.
   static KnownBits mulhs(const KnownBits &LHS, const KnownBits &RHS);
diff --git a/llvm/include/llvm/Support/MD5.h b/llvm/include/llvm/Support/MD5.h
index 3b2d5b974d0b..3b960cd4fd88 100644
--- a/llvm/include/llvm/Support/MD5.h
+++ b/llvm/include/llvm/Support/MD5.h
@@ -39,18 +39,6 @@ template <unsigned N> class SmallString;
 template <typename T> class ArrayRef;
 
 class MD5 {
-  // Any 32-bit or wider unsigned integer data type will do.
-  typedef uint32_t MD5_u32plus;
-
-  MD5_u32plus a = 0x67452301;
-  MD5_u32plus b = 0xefcdab89;
-  MD5_u32plus c = 0x98badcfe;
-  MD5_u32plus d = 0x10325476;
-  MD5_u32plus hi = 0;
-  MD5_u32plus lo = 0;
-  uint8_t buffer[64];
-  MD5_u32plus block[16];
-
 public:
   struct MD5Result {
     std::array<uint8_t, 16> Bytes;
@@ -90,6 +78,14 @@ public:
   /// Finishes off the hash and puts the result in result.
   void final(MD5Result &Result);
 
+  /// Finishes off the hash, and returns a reference to the 16-byte hash data.
+  StringRef final();
+
+  /// Finishes off the hash, and returns a reference to the 16-byte hash data.
+  /// This is suitable for getting the MD5 at any time without invalidating the
+  /// internal state, so that more calls can be made into `update`.
+  StringRef result();
+
   /// Translates the bytes in \p Res to a hex string that is
   /// deposited into \p Str. The result will be of length 32.
   static void stringifyResult(MD5Result &Result, SmallString<32> &Str);
@@ -98,6 +94,23 @@ public:
   static std::array<uint8_t, 16> hash(ArrayRef<uint8_t> Data);
 
 private:
+  // Any 32-bit or wider unsigned integer data type will do.
+  typedef uint32_t MD5_u32plus;
+
+  // Internal State
+  struct {
+    MD5_u32plus a = 0x67452301;
+    MD5_u32plus b = 0xefcdab89;
+    MD5_u32plus c = 0x98badcfe;
+    MD5_u32plus d = 0x10325476;
+    MD5_u32plus hi = 0;
+    MD5_u32plus lo = 0;
+    uint8_t buffer[64];
+    MD5_u32plus block[16];
+  } InternalState;
+
+  MD5Result Result;
+
   const uint8_t *body(ArrayRef<uint8_t> Data);
 };
 
diff --git a/llvm/include/llvm/Support/MSP430AttributeParser.h b/llvm/include/llvm/Support/MSP430AttributeParser.h
new file mode 100644
index 000000000000..bc9b21494470
--- /dev/null
+++ b/llvm/include/llvm/Support/MSP430AttributeParser.h
@@ -0,0 +1,44 @@
+//===-- MSP430AttributeParser.h - MSP430 Attribute Parser -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains support routines for parsing MSP430 ELF build attributes.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_MSP430ATTRIBUTEPARSER_H
+#define LLVM_SUPPORT_MSP430ATTRIBUTEPARSER_H
+
+#include "llvm/Support/ELFAttributeParser.h"
+#include "llvm/Support/MSP430Attributes.h"
+
+namespace llvm {
+class MSP430AttributeParser : public ELFAttributeParser {
+  struct DisplayHandler {
+    MSP430Attrs::AttrType Attribute;
+    Error (MSP430AttributeParser::*Routine)(MSP430Attrs::AttrType);
+  };
+  static const std::array<DisplayHandler, 4> DisplayRoutines;
+
+  Error parseISA(MSP430Attrs::AttrType Tag);
+  Error parseCodeModel(MSP430Attrs::AttrType Tag);
+  Error parseDataModel(MSP430Attrs::AttrType Tag);
+  Error parseEnumSize(MSP430Attrs::AttrType Tag);
+
+  Error handler(uint64_t Tag, bool &Handled) override;
+
+public:
+  MSP430AttributeParser(ScopedPrinter *SW)
+      : ELFAttributeParser(SW, MSP430Attrs::getMSP430AttributeTags(),
+                           "mspabi") {}
+  MSP430AttributeParser()
+      : ELFAttributeParser(MSP430Attrs::getMSP430AttributeTags(), "mspabi") {}
+};
+} // namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/Support/MSP430Attributes.h b/llvm/include/llvm/Support/MSP430Attributes.h
new file mode 100644
index 000000000000..fccd65e844c3
--- /dev/null
+++ b/llvm/include/llvm/Support/MSP430Attributes.h
@@ -0,0 +1,44 @@
+//===-- MSP430Attributes.h - MSP430 Attributes ------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===-----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains enumerations for MSP430 ELF build attributes as
+/// defined in the MSP430 ELF psABI specification.
+///
+/// MSP430 ELF psABI specification
+///
+/// https://www.ti.com/lit/pdf/slaa534
+///
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_SUPPORT_MSP430ATTRIBUTES_H
+#define LLVM_SUPPORT_MSP430ATTRIBUTES_H
+
+#include "llvm/Support/ELFAttributes.h"
+
+namespace llvm {
+namespace MSP430Attrs {
+
+const TagNameMap &getMSP430AttributeTags();
+
+enum AttrType : unsigned {
+  // Attribute types in ELF/.MSP430.attributes.
+  TagISA = 4,
+  TagCodeModel = 6,
+  TagDataModel = 8,
+  TagEnumSize = 10
+};
+
+enum ISA { ISAMSP430 = 1, ISAMSP430X = 2 };
+enum CodeModel { CMSmall = 1, CMLarge = 2 };
+enum DataModel { DMSmall = 1, DMLarge = 2, DMRestricted = 3 };
+enum EnumSize { ESSmall = 1, ESInteger = 2, ESDontCare = 3 };
+
+} // namespace MSP430Attrs
+} // namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/Support/MachineValueType.h b/llvm/include/llvm/Support/MachineValueType.h
index 31f2d5a48183..ce10a4c58dfe 100644
--- a/llvm/include/llvm/Support/MachineValueType.h
+++ b/llvm/include/llvm/Support/MachineValueType.h
@@ -270,9 +270,10 @@ namespace llvm {
       funcref        = 175,    // WebAssembly's funcref type
       externref      = 176,    // WebAssembly's externref type
       x86amx         = 177,    // This is an X86 AMX value
+      i64x8          = 178,    // 8 Consecutive GPRs (AArch64)
 
       FIRST_VALUETYPE =  1,    // This is always the beginning of the list.
-      LAST_VALUETYPE = x86amx, // This always remains at the end of the list.
+      LAST_VALUETYPE = i64x8,  // This always remains at the end of the list.
       VALUETYPE_SIZE = LAST_VALUETYPE + 1,
 
       // This is the current maximum for LAST_VALUETYPE.
@@ -987,6 +988,7 @@ namespace llvm {
       case nxv16f16:
       case nxv8f32:
       case nxv4f64: return TypeSize::Scalable(256);
+      case i64x8:
       case v512i1:
       case v64i8:
       case v32i16:
@@ -1403,51 +1405,61 @@ namespace llvm {
     /// SimpleValueType Iteration
     /// @{
     static auto all_valuetypes() {
-      return seq_inclusive(MVT::FIRST_VALUETYPE, MVT::LAST_VALUETYPE);
+      return enum_seq_inclusive(MVT::FIRST_VALUETYPE, MVT::LAST_VALUETYPE,
+                                force_iteration_on_noniterable_enum);
     }
 
     static auto integer_valuetypes() {
-      return seq_inclusive(MVT::FIRST_INTEGER_VALUETYPE,
-                           MVT::LAST_INTEGER_VALUETYPE);
+      return enum_seq_inclusive(MVT::FIRST_INTEGER_VALUETYPE,
+                                MVT::LAST_INTEGER_VALUETYPE,
+                                force_iteration_on_noniterable_enum);
     }
 
     static auto fp_valuetypes() {
-      return seq_inclusive(MVT::FIRST_FP_VALUETYPE, MVT::LAST_FP_VALUETYPE);
+      return enum_seq_inclusive(MVT::FIRST_FP_VALUETYPE, MVT::LAST_FP_VALUETYPE,
+                                force_iteration_on_noniterable_enum);
     }
 
     static auto vector_valuetypes() {
-      return seq_inclusive(MVT::FIRST_VECTOR_VALUETYPE,
-                           MVT::LAST_VECTOR_VALUETYPE);
+      return enum_seq_inclusive(MVT::FIRST_VECTOR_VALUETYPE,
+                                MVT::LAST_VECTOR_VALUETYPE,
+                                force_iteration_on_noniterable_enum);
     }
 
     static auto fixedlen_vector_valuetypes() {
-      return seq_inclusive(MVT::FIRST_FIXEDLEN_VECTOR_VALUETYPE,
-                           MVT::LAST_FIXEDLEN_VECTOR_VALUETYPE);
+      return enum_seq_inclusive(MVT::FIRST_FIXEDLEN_VECTOR_VALUETYPE,
+                                MVT::LAST_FIXEDLEN_VECTOR_VALUETYPE,
+                                force_iteration_on_noniterable_enum);
     }
 
     static auto scalable_vector_valuetypes() {
-      return seq_inclusive(MVT::FIRST_SCALABLE_VECTOR_VALUETYPE,
-                           MVT::LAST_SCALABLE_VECTOR_VALUETYPE);
+      return enum_seq_inclusive(MVT::FIRST_SCALABLE_VECTOR_VALUETYPE,
+                                MVT::LAST_SCALABLE_VECTOR_VALUETYPE,
+                                force_iteration_on_noniterable_enum);
     }
 
     static auto integer_fixedlen_vector_valuetypes() {
-      return seq_inclusive(MVT::FIRST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE,
-                           MVT::LAST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE);
+      return enum_seq_inclusive(MVT::FIRST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE,
+                                MVT::LAST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE,
+                                force_iteration_on_noniterable_enum);
     }
 
     static auto fp_fixedlen_vector_valuetypes() {
-      return seq_inclusive(MVT::FIRST_FP_FIXEDLEN_VECTOR_VALUETYPE,
-                           MVT::LAST_FP_FIXEDLEN_VECTOR_VALUETYPE);
+      return enum_seq_inclusive(MVT::FIRST_FP_FIXEDLEN_VECTOR_VALUETYPE,
+                                MVT::LAST_FP_FIXEDLEN_VECTOR_VALUETYPE,
+                                force_iteration_on_noniterable_enum);
     }
 
     static auto integer_scalable_vector_valuetypes() {
-      return seq_inclusive(MVT::FIRST_INTEGER_SCALABLE_VECTOR_VALUETYPE,
-                           MVT::LAST_INTEGER_SCALABLE_VECTOR_VALUETYPE);
+      return enum_seq_inclusive(MVT::FIRST_INTEGER_SCALABLE_VECTOR_VALUETYPE,
+                                MVT::LAST_INTEGER_SCALABLE_VECTOR_VALUETYPE,
+                                force_iteration_on_noniterable_enum);
     }
 
     static auto fp_scalable_vector_valuetypes() {
-      return seq_inclusive(MVT::FIRST_FP_SCALABLE_VECTOR_VALUETYPE,
-                           MVT::LAST_FP_SCALABLE_VECTOR_VALUETYPE);
+      return enum_seq_inclusive(MVT::FIRST_FP_SCALABLE_VECTOR_VALUETYPE,
+                                MVT::LAST_FP_SCALABLE_VECTOR_VALUETYPE,
+                                force_iteration_on_noniterable_enum);
     }
     /// @}
   };
diff --git a/llvm/include/llvm/Support/Memory.h b/llvm/include/llvm/Support/Memory.h
index 31e0abbcdb61..d7d60371d315 100644
--- a/llvm/include/llvm/Support/Memory.h
+++ b/llvm/include/llvm/Support/Memory.h
@@ -37,7 +37,7 @@ namespace sys {
     /// The size as it was allocated. This is always greater or equal to the
     /// size that was originally requested.
     size_t allocatedSize() const { return AllocatedSize; }
-  
+
   private:
     void *Address;    ///< Address of first byte of memory area
     size_t AllocatedSize; ///< Size, in bytes of the memory area
@@ -148,13 +148,22 @@ namespace sys {
       return *this;
     }
     ~OwningMemoryBlock() {
-      Memory::releaseMappedMemory(M);
+      if (M.base())
+        Memory::releaseMappedMemory(M);
     }
     void *base() const { return M.base(); }
     /// The size as it was allocated. This is always greater or equal to the
     /// size that was originally requested.
     size_t allocatedSize() const { return M.allocatedSize(); }
     MemoryBlock getMemoryBlock() const { return M; }
+    std::error_code release() {
+      std::error_code EC;
+      if (M.base()) {
+        EC = Memory::releaseMappedMemory(M);
+        M = MemoryBlock();
+      }
+      return EC;
+    }
   private:
     MemoryBlock M;
   };
diff --git a/llvm/include/llvm/Support/PGOOptions.h b/llvm/include/llvm/Support/PGOOptions.h
new file mode 100644
index 000000000000..2141e2159c0c
--- /dev/null
+++ b/llvm/include/llvm/Support/PGOOptions.h
@@ -0,0 +1,65 @@
+//===------ PGOOptions.h -- PGO option tunables ----------------*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// Define option tunables for PGO.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_PGOOPTIONS_H
+#define LLVM_SUPPORT_PGOOPTIONS_H
+
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+
+/// A struct capturing PGO tunables.
+struct PGOOptions {
+  enum PGOAction { NoAction, IRInstr, IRUse, SampleUse };
+  enum CSPGOAction { NoCSAction, CSIRInstr, CSIRUse };
+  PGOOptions(std::string ProfileFile = "", std::string CSProfileGenFile = "",
+             std::string ProfileRemappingFile = "", PGOAction Action = NoAction,
+             CSPGOAction CSAction = NoCSAction,
+             bool DebugInfoForProfiling = false,
+             bool PseudoProbeForProfiling = false)
+      : ProfileFile(ProfileFile), CSProfileGenFile(CSProfileGenFile),
+        ProfileRemappingFile(ProfileRemappingFile), Action(Action),
+        CSAction(CSAction), DebugInfoForProfiling(DebugInfoForProfiling ||
+                                                  (Action == SampleUse &&
+                                                   !PseudoProbeForProfiling)),
+        PseudoProbeForProfiling(PseudoProbeForProfiling) {
+    // Note, we do allow ProfileFile.empty() for Action=IRUse LTO can
+    // callback with IRUse action without ProfileFile.
+
+    // If there is a CSAction, PGOAction cannot be IRInstr or SampleUse.
+    assert(this->CSAction == NoCSAction ||
+           (this->Action != IRInstr && this->Action != SampleUse));
+
+    // For CSIRInstr, CSProfileGenFile also needs to be nonempty.
+    assert(this->CSAction != CSIRInstr || !this->CSProfileGenFile.empty());
+
+    // If CSAction is CSIRUse, PGOAction needs to be IRUse as they share
+    // a profile.
+    assert(this->CSAction != CSIRUse || this->Action == IRUse);
+
+    // If neither Action nor CSAction, DebugInfoForProfiling or
+    // PseudoProbeForProfiling needs to be true.
+    assert(this->Action != NoAction || this->CSAction != NoCSAction ||
+           this->DebugInfoForProfiling || this->PseudoProbeForProfiling);
+  }
+  std::string ProfileFile;
+  std::string CSProfileGenFile;
+  std::string ProfileRemappingFile;
+  PGOAction Action;
+  CSPGOAction CSAction;
+  bool DebugInfoForProfiling;
+  bool PseudoProbeForProfiling;
+};
+} // namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/Support/Parallel.h b/llvm/include/llvm/Support/Parallel.h
index 28d171d45256..5c3b26d5754c 100644
--- a/llvm/include/llvm/Support/Parallel.h
+++ b/llvm/include/llvm/Support/Parallel.h
@@ -40,7 +40,10 @@ class Latch {
 
 public:
   explicit Latch(uint32_t Count = 0) : Count(Count) {}
-  ~Latch() { sync(); }
+  ~Latch() {
+    // Ensure at least that sync() was called.
+    assert(Count == 0);
+  }
 
   void inc() {
     std::lock_guard<std::mutex> lock(Mutex);
diff --git a/llvm/include/llvm/Support/Path.h b/llvm/include/llvm/Support/Path.h
index af70e086a1b6..da5095714f48 100644
--- a/llvm/include/llvm/Support/Path.h
+++ b/llvm/include/llvm/Support/Path.h
@@ -25,7 +25,29 @@ namespace llvm {
 namespace sys {
 namespace path {
 
-enum class Style { windows, posix, native };
+enum class Style {
+  native,
+  posix,
+  windows_slash,
+  windows_backslash,
+  windows = windows_backslash, // deprecated
+};
+
+/// Check if \p S uses POSIX path rules.
+constexpr bool is_style_posix(Style S) {
+  if (S == Style::posix)
+    return true;
+  if (S != Style::native)
+    return false;
+#if defined(_WIN32)
+  return false;
+#else
+  return true;
+#endif
+}
+
+/// Check if \p S uses Windows path rules.
+constexpr bool is_style_windows(Style S) { return !is_style_posix(S); }
 
 /// @name Lexical Component Iterator
 /// @{
@@ -174,6 +196,21 @@ bool replace_path_prefix(SmallVectorImpl<char> &Path, StringRef OldPrefix,
                          StringRef NewPrefix,
                          Style style = Style::native);
 
+/// Remove redundant leading "./" pieces and consecutive separators.
+///
+/// @param path Input path.
+/// @result The cleaned-up \a path.
+StringRef remove_leading_dotslash(StringRef path, Style style = Style::native);
+
+/// In-place remove any './' and optionally '../' components from a path.
+///
+/// @param path processed path
+/// @param remove_dot_dot specify if '../' (except for leading "../") should be
+/// removed
+/// @result True if path was changed
+bool remove_dots(SmallVectorImpl<char> &path, bool remove_dot_dot = false,
+                 Style style = Style::native);
+
 /// Append to path.
 ///
 /// @code
@@ -212,7 +249,7 @@ void append(SmallVectorImpl<char> &path, const_iterator begin,
 
 /// Convert path to the native form. This is used to give paths to users and
 /// operating system calls in the platform's normal way. For example, on Windows
-/// all '/' are converted to '\'.
+/// all '/' are converted to '\'. On Unix, it converts all '\' to '/'.
 ///
 /// @param path A path that is transformed to native format.
 /// @param result Holds the result of the transformation.
@@ -226,6 +263,17 @@ void native(const Twine &path, SmallVectorImpl<char> &result,
 /// @param path A path that is transformed to native format.
 void native(SmallVectorImpl<char> &path, Style style = Style::native);
 
+/// For Windows path styles, convert path to use the preferred path separators.
+/// For other styles, do nothing.
+///
+/// @param path A path that is transformed to preferred format.
+inline void make_preferred(SmallVectorImpl<char> &path,
+                           Style style = Style::native) {
+  if (!is_style_windows(style))
+    return;
+  native(path, style);
+}
+
 /// Replaces backslashes with slashes if Windows.
 ///
 /// @param path processed path
@@ -499,21 +547,6 @@ bool is_absolute_gnu(const Twine &path, Style style = Style::native);
 /// @result True if the path is relative, false if it is not.
 bool is_relative(const Twine &path, Style style = Style::native);
 
-/// Remove redundant leading "./" pieces and consecutive separators.
-///
-/// @param path Input path.
-/// @result The cleaned-up \a path.
-StringRef remove_leading_dotslash(StringRef path, Style style = Style::native);
-
-/// In-place remove any './' and optionally '../' components from a path.
-///
-/// @param path processed path
-/// @param remove_dot_dot specify if '../' (except for leading "../") should be
-/// removed
-/// @result True if path was changed
-bool remove_dots(SmallVectorImpl<char> &path, bool remove_dot_dot = false,
-                 Style style = Style::native);
-
 } // end namespace path
 } // end namespace sys
 } // end namespace llvm
diff --git a/llvm/include/llvm/Support/Process.h b/llvm/include/llvm/Support/Process.h
index 6687e5e7ff9a..ee03efeed9b2 100644
--- a/llvm/include/llvm/Support/Process.h
+++ b/llvm/include/llvm/Support/Process.h
@@ -214,12 +214,10 @@ public:
   /// In that case, the control flow will resume after RunSafely(), like for a
   /// crash, rather than exiting the current process.
   /// Use \arg NoCleanup for calling _exit() instead of exit().
-  LLVM_ATTRIBUTE_NORETURN
-  static void Exit(int RetCode, bool NoCleanup = false);
+  [[noreturn]] static void Exit(int RetCode, bool NoCleanup = false);
 
 private:
-  LLVM_ATTRIBUTE_NORETURN
-  static void ExitNoCleanup(int RetCode);
+  [[noreturn]] static void ExitNoCleanup(int RetCode);
 };
 
 }
diff --git a/llvm/include/llvm/Support/RISCVISAInfo.h b/llvm/include/llvm/Support/RISCVISAInfo.h
new file mode 100644
index 000000000000..7110de601123
--- /dev/null
+++ b/llvm/include/llvm/Support/RISCVISAInfo.h
@@ -0,0 +1,89 @@
+//===-- RISCVISAInfo.h - RISCV ISA Information ------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_RISCVISAINFO_H
+#define LLVM_SUPPORT_RISCVISAINFO_H
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Error.h"
+
+#include <map>
+#include <string>
+#include <vector>
+
+namespace llvm {
+struct RISCVExtensionInfo {
+  std::string ExtName;
+  unsigned MajorVersion;
+  unsigned MinorVersion;
+};
+
+class RISCVISAInfo {
+public:
+  RISCVISAInfo(const RISCVISAInfo &) = delete;
+  RISCVISAInfo &operator=(const RISCVISAInfo &) = delete;
+
+  static bool compareExtension(const std::string &LHS, const std::string &RHS);
+
+  /// Helper class for OrderedExtensionMap.
+  struct ExtensionComparator {
+    bool operator()(const std::string &LHS, const std::string &RHS) const {
+      return compareExtension(LHS, RHS);
+    }
+  };
+
+  /// OrderedExtensionMap is std::map, it's specialized to keep entries
+  /// in canonical order of extension.
+  typedef std::map<std::string, RISCVExtensionInfo, ExtensionComparator>
+      OrderedExtensionMap;
+
+  /// Parse RISCV ISA info from arch string.
+  static llvm::Expected<std::unique_ptr<RISCVISAInfo>>
+  parseArchString(StringRef Arch, bool EnableExperimentalExtension,
+                  bool ExperimentalExtensionVersionCheck = true);
+
+  /// Parse RISCV ISA info from feature vector.
+  static llvm::Expected<std::unique_ptr<RISCVISAInfo>>
+  parseFeatures(unsigned XLen, const std::vector<std::string> &Features);
+
+  /// Convert RISCV ISA info to a feature vector.
+  void toFeatures(std::vector<StringRef> &Features,
+                  std::function<StringRef(const Twine &)> StrAlloc) const;
+
+  const OrderedExtensionMap &getExtensions() const { return Exts; };
+
+  unsigned getXLen() const { return XLen; };
+  unsigned getFLen() const { return FLen; };
+
+  bool hasExtension(StringRef Ext) const;
+  std::string toString() const;
+
+  static bool isSupportedExtensionFeature(StringRef Ext);
+  static bool isSupportedExtension(StringRef Ext);
+  static bool isSupportedExtension(StringRef Ext, unsigned MajorVersion,
+                                   unsigned MinorVersion);
+
+private:
+  RISCVISAInfo(unsigned XLen) : XLen(XLen), FLen(0) {}
+
+  unsigned XLen;
+  unsigned FLen;
+
+  OrderedExtensionMap Exts;
+
+  void addExtension(StringRef ExtName, unsigned MajorVersion,
+                    unsigned MinorVersion);
+
+  void updateFLen();
+};
+
+} // namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/Support/RISCVTargetParser.def b/llvm/include/llvm/Support/RISCVTargetParser.def
index 6a06f9258105..f658cdb91c6b 100644
--- a/llvm/include/llvm/Support/RISCVTargetParser.def
+++ b/llvm/include/llvm/Support/RISCVTargetParser.def
@@ -19,9 +19,17 @@ PROC(ROCKET_RV32, {"rocket-rv32"}, FK_NONE, {""})
 PROC(ROCKET_RV64, {"rocket-rv64"}, FK_64BIT, {""})
 PROC(SIFIVE_732, {"sifive-7-rv32"}, FK_NONE, {""})
 PROC(SIFIVE_764, {"sifive-7-rv64"}, FK_64BIT, {""})
+PROC(SIFIVE_E20, {"sifive-e20"}, FK_NONE, {"rv32imc"})
+PROC(SIFIVE_E21, {"sifive-e21"}, FK_NONE, {"rv32imac"})
+PROC(SIFIVE_E24, {"sifive-e24"}, FK_NONE, {"rv32imafc"})
 PROC(SIFIVE_E31, {"sifive-e31"}, FK_NONE, {"rv32imac"})
-PROC(SIFIVE_U54, {"sifive-u54"}, FK_64BIT, {"rv64gc"})
+PROC(SIFIVE_E34, {"sifive-e34"}, FK_NONE, {"rv32imafc"})
 PROC(SIFIVE_E76, {"sifive-e76"}, FK_NONE, {"rv32imafc"})
+PROC(SIFIVE_S21, {"sifive-s21"}, FK_64BIT, {"rv64imac"})
+PROC(SIFIVE_S51, {"sifive-s51"}, FK_64BIT, {"rv64imac"})
+PROC(SIFIVE_S54, {"sifive-s54"}, FK_64BIT, {"rv64gc"})
+PROC(SIFIVE_S76, {"sifive-s76"}, FK_64BIT, {"rv64gc"})
+PROC(SIFIVE_U54, {"sifive-u54"}, FK_64BIT, {"rv64gc"})
 PROC(SIFIVE_U74, {"sifive-u74"}, FK_64BIT, {"rv64gc"})
 
 #undef PROC
diff --git a/llvm/include/llvm/Support/Signposts.h b/llvm/include/llvm/Support/Signposts.h
index bc6abba0a0e1..dabbba6f89d1 100644
--- a/llvm/include/llvm/Support/Signposts.h
+++ b/llvm/include/llvm/Support/Signposts.h
@@ -1,9 +1,8 @@
 //===-- llvm/Support/Signposts.h - Interval debug annotations ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -18,17 +17,8 @@
 #define LLVM_SUPPORT_SIGNPOSTS_H
 
 #include "llvm/ADT/StringRef.h"
-#include "llvm/Config/llvm-config.h"
 #include <memory>
 
-#if LLVM_SUPPORT_XCODE_SIGNPOSTS
-#include <Availability.h>
-#include <os/signpost.h>
-#endif
-
-#define SIGNPOSTS_AVAILABLE()                                                  \
-  __builtin_available(macos 10.14, iOS 12, tvOS 12, watchOS 5, *)
-
 namespace llvm {
 class SignpostEmitterImpl;
 
@@ -45,33 +35,8 @@ public:
 
   /// Begin a signposted interval for a given object.
   void startInterval(const void *O, StringRef Name);
-
-#if LLVM_SUPPORT_XCODE_SIGNPOSTS
-  os_log_t &getLogger() const;
-  os_signpost_id_t getSignpostForObject(const void *O);
-#endif
-
-  /// A macro to take advantage of the special format string handling
-  /// in the os_signpost API. The format string substitution is
-  /// deferred to the log consumer and done outside of the
-  /// application.
-#if LLVM_SUPPORT_XCODE_SIGNPOSTS
-#define SIGNPOST_EMITTER_START_INTERVAL(SIGNPOST_EMITTER, O, ...)              \
-  do {                                                                         \
-    if ((SIGNPOST_EMITTER).isEnabled())                                        \
-      if (SIGNPOSTS_AVAILABLE())                                               \
-        os_signpost_interval_begin((SIGNPOST_EMITTER).getLogger(),             \
-                                   (SIGNPOST_EMITTER).getSignpostForObject(O), \
-                                   "LLVM Timers", __VA_ARGS__);                \
-  } while (0)
-#else
-#define SIGNPOST_EMITTER_START_INTERVAL(SIGNPOST_EMITTER, O, ...)              \
-  do {                                                                         \
-  } while (0)
-#endif
-
   /// End a signposted interval for a given object.
-  void endInterval(const void *O);
+  void endInterval(const void *O, StringRef Name);
 };
 
 } // end namespace llvm
diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def
index fbe0d1a55bfc..b34b885ddc35 100644
--- a/llvm/include/llvm/Support/TargetOpcodes.def
+++ b/llvm/include/llvm/Support/TargetOpcodes.def
@@ -652,6 +652,9 @@ HANDLE_TARGET_OPCODE(G_UMAX)
 /// Generic integer absolute value.
 HANDLE_TARGET_OPCODE(G_ABS)
 
+HANDLE_TARGET_OPCODE(G_LROUND)
+HANDLE_TARGET_OPCODE(G_LLROUND)
+
 /// Generic BRANCH instruction. This is an unconditional branch.
 HANDLE_TARGET_OPCODE(G_BR)
 
diff --git a/llvm/include/llvm/Support/TargetRegistry.h b/llvm/include/llvm/Support/TargetRegistry.h
deleted file mode 100644
index e661ae26cb4e..000000000000
--- a/llvm/include/llvm/Support/TargetRegistry.h
+++ /dev/null
@@ -1,1297 +0,0 @@
-//===- Support/TargetRegistry.h - Target Registration -----------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file exposes the TargetRegistry interface, which tools can use to access
-// the appropriate target specific classes (TargetMachine, AsmPrinter, etc.)
-// which have been registered.
-//
-// Target specific class implementations should register themselves using the
-// appropriate TargetRegistry interfaces.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_SUPPORT_TARGETREGISTRY_H
-#define LLVM_SUPPORT_TARGETREGISTRY_H
-
-#include "llvm-c/DisassemblerTypes.h"
-#include "llvm/ADT/Optional.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
-#include "llvm/ADT/iterator_range.h"
-#include "llvm/MC/MCObjectFileInfo.h"
-#include "llvm/Support/CodeGen.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/FormattedStream.h"
-#include <algorithm>
-#include <cassert>
-#include <cstddef>
-#include <iterator>
-#include <memory>
-#include <string>
-
-namespace llvm {
-
-class AsmPrinter;
-class MCAsmBackend;
-class MCAsmInfo;
-class MCAsmParser;
-class MCCodeEmitter;
-class MCContext;
-class MCDisassembler;
-class MCInstPrinter;
-class MCInstrAnalysis;
-class MCInstrInfo;
-class MCObjectWriter;
-class MCRegisterInfo;
-class MCRelocationInfo;
-class MCStreamer;
-class MCSubtargetInfo;
-class MCSymbolizer;
-class MCTargetAsmParser;
-class MCTargetOptions;
-class MCTargetStreamer;
-class raw_ostream;
-class raw_pwrite_stream;
-class TargetMachine;
-class TargetOptions;
-
-MCStreamer *createNullStreamer(MCContext &Ctx);
-// Takes ownership of \p TAB and \p CE.
-
-/// Create a machine code streamer which will print out assembly for the native
-/// target, suitable for compiling with a native assembler.
-///
-/// \param InstPrint - If given, the instruction printer to use. If not given
-/// the MCInst representation will be printed.  This method takes ownership of
-/// InstPrint.
-///
-/// \param CE - If given, a code emitter to use to show the instruction
-/// encoding inline with the assembly. This method takes ownership of \p CE.
-///
-/// \param TAB - If given, a target asm backend to use to show the fixup
-/// information in conjunction with encoding information. This method takes
-/// ownership of \p TAB.
-///
-/// \param ShowInst - Whether to show the MCInst representation inline with
-/// the assembly.
-MCStreamer *
-createAsmStreamer(MCContext &Ctx, std::unique_ptr<formatted_raw_ostream> OS,
-                  bool isVerboseAsm, bool useDwarfDirectory,
-                  MCInstPrinter *InstPrint, std::unique_ptr<MCCodeEmitter> &&CE,
-                  std::unique_ptr<MCAsmBackend> &&TAB, bool ShowInst);
-
-MCStreamer *createELFStreamer(MCContext &Ctx,
-                              std::unique_ptr<MCAsmBackend> &&TAB,
-                              std::unique_ptr<MCObjectWriter> &&OW,
-                              std::unique_ptr<MCCodeEmitter> &&CE,
-                              bool RelaxAll);
-MCStreamer *createMachOStreamer(MCContext &Ctx,
-                                std::unique_ptr<MCAsmBackend> &&TAB,
-                                std::unique_ptr<MCObjectWriter> &&OW,
-                                std::unique_ptr<MCCodeEmitter> &&CE,
-                                bool RelaxAll, bool DWARFMustBeAtTheEnd,
-                                bool LabelSections = false);
-MCStreamer *createWasmStreamer(MCContext &Ctx,
-                               std::unique_ptr<MCAsmBackend> &&TAB,
-                               std::unique_ptr<MCObjectWriter> &&OW,
-                               std::unique_ptr<MCCodeEmitter> &&CE,
-                               bool RelaxAll);
-MCStreamer *createXCOFFStreamer(MCContext &Ctx,
-                                std::unique_ptr<MCAsmBackend> &&TAB,
-                                std::unique_ptr<MCObjectWriter> &&OW,
-                                std::unique_ptr<MCCodeEmitter> &&CE,
-                                bool RelaxAll);
-
-MCRelocationInfo *createMCRelocationInfo(const Triple &TT, MCContext &Ctx);
-
-MCSymbolizer *createMCSymbolizer(const Triple &TT, LLVMOpInfoCallback GetOpInfo,
-                                 LLVMSymbolLookupCallback SymbolLookUp,
-                                 void *DisInfo, MCContext *Ctx,
-                                 std::unique_ptr<MCRelocationInfo> &&RelInfo);
-
-/// Target - Wrapper for Target specific information.
-///
-/// For registration purposes, this is a POD type so that targets can be
-/// registered without the use of static constructors.
-///
-/// Targets should implement a single global instance of this class (which
-/// will be zero initialized), and pass that instance to the TargetRegistry as
-/// part of their initialization.
-class Target {
-public:
-  friend struct TargetRegistry;
-
-  using ArchMatchFnTy = bool (*)(Triple::ArchType Arch);
-
-  using MCAsmInfoCtorFnTy = MCAsmInfo *(*)(const MCRegisterInfo &MRI,
-                                           const Triple &TT,
-                                           const MCTargetOptions &Options);
-  using MCObjectFileInfoCtorFnTy = MCObjectFileInfo *(*)(MCContext &Ctx,
-                                                         bool PIC,
-                                                         bool LargeCodeModel);
-  using MCInstrInfoCtorFnTy = MCInstrInfo *(*)();
-  using MCInstrAnalysisCtorFnTy = MCInstrAnalysis *(*)(const MCInstrInfo *Info);
-  using MCRegInfoCtorFnTy = MCRegisterInfo *(*)(const Triple &TT);
-  using MCSubtargetInfoCtorFnTy = MCSubtargetInfo *(*)(const Triple &TT,
-                                                       StringRef CPU,
-                                                       StringRef Features);
-  using TargetMachineCtorTy = TargetMachine
-      *(*)(const Target &T, const Triple &TT, StringRef CPU, StringRef Features,
-           const TargetOptions &Options, Optional<Reloc::Model> RM,
-           Optional<CodeModel::Model> CM, CodeGenOpt::Level OL, bool JIT);
-  // If it weren't for layering issues (this header is in llvm/Support, but
-  // depends on MC?) this should take the Streamer by value rather than rvalue
-  // reference.
-  using AsmPrinterCtorTy = AsmPrinter *(*)(
-      TargetMachine &TM, std::unique_ptr<MCStreamer> &&Streamer);
-  using MCAsmBackendCtorTy = MCAsmBackend *(*)(const Target &T,
-                                               const MCSubtargetInfo &STI,
-                                               const MCRegisterInfo &MRI,
-                                               const MCTargetOptions &Options);
-  using MCAsmParserCtorTy = MCTargetAsmParser *(*)(
-      const MCSubtargetInfo &STI, MCAsmParser &P, const MCInstrInfo &MII,
-      const MCTargetOptions &Options);
-  using MCDisassemblerCtorTy = MCDisassembler *(*)(const Target &T,
-                                                   const MCSubtargetInfo &STI,
-                                                   MCContext &Ctx);
-  using MCInstPrinterCtorTy = MCInstPrinter *(*)(const Triple &T,
-                                                 unsigned SyntaxVariant,
-                                                 const MCAsmInfo &MAI,
-                                                 const MCInstrInfo &MII,
-                                                 const MCRegisterInfo &MRI);
-  using MCCodeEmitterCtorTy = MCCodeEmitter *(*)(const MCInstrInfo &II,
-                                                 const MCRegisterInfo &MRI,
-                                                 MCContext &Ctx);
-  using ELFStreamerCtorTy =
-      MCStreamer *(*)(const Triple &T, MCContext &Ctx,
-                      std::unique_ptr<MCAsmBackend> &&TAB,
-                      std::unique_ptr<MCObjectWriter> &&OW,
-                      std::unique_ptr<MCCodeEmitter> &&Emitter, bool RelaxAll);
-  using MachOStreamerCtorTy =
-      MCStreamer *(*)(MCContext &Ctx, std::unique_ptr<MCAsmBackend> &&TAB,
-                      std::unique_ptr<MCObjectWriter> &&OW,
-                      std::unique_ptr<MCCodeEmitter> &&Emitter, bool RelaxAll,
-                      bool DWARFMustBeAtTheEnd);
-  using COFFStreamerCtorTy =
-      MCStreamer *(*)(MCContext &Ctx, std::unique_ptr<MCAsmBackend> &&TAB,
-                      std::unique_ptr<MCObjectWriter> &&OW,
-                      std::unique_ptr<MCCodeEmitter> &&Emitter, bool RelaxAll,
-                      bool IncrementalLinkerCompatible);
-  using WasmStreamerCtorTy =
-      MCStreamer *(*)(const Triple &T, MCContext &Ctx,
-                      std::unique_ptr<MCAsmBackend> &&TAB,
-                      std::unique_ptr<MCObjectWriter> &&OW,
-                      std::unique_ptr<MCCodeEmitter> &&Emitter, bool RelaxAll);
-  using XCOFFStreamerCtorTy =
-      MCStreamer *(*)(const Triple &T, MCContext &Ctx,
-                      std::unique_ptr<MCAsmBackend> &&TAB,
-                      std::unique_ptr<MCObjectWriter> &&OW,
-                      std::unique_ptr<MCCodeEmitter> &&Emitter, bool RelaxAll);
-
-  using NullTargetStreamerCtorTy = MCTargetStreamer *(*)(MCStreamer &S);
-  using AsmTargetStreamerCtorTy = MCTargetStreamer *(*)(
-      MCStreamer &S, formatted_raw_ostream &OS, MCInstPrinter *InstPrint,
-      bool IsVerboseAsm);
-  using ObjectTargetStreamerCtorTy = MCTargetStreamer *(*)(
-      MCStreamer &S, const MCSubtargetInfo &STI);
-  using MCRelocationInfoCtorTy = MCRelocationInfo *(*)(const Triple &TT,
-                                                       MCContext &Ctx);
-  using MCSymbolizerCtorTy = MCSymbolizer *(*)(
-      const Triple &TT, LLVMOpInfoCallback GetOpInfo,
-      LLVMSymbolLookupCallback SymbolLookUp, void *DisInfo, MCContext *Ctx,
-      std::unique_ptr<MCRelocationInfo> &&RelInfo);
-
-private:
-  /// Next - The next registered target in the linked list, maintained by the
-  /// TargetRegistry.
-  Target *Next;
-
-  /// The target function for checking if an architecture is supported.
-  ArchMatchFnTy ArchMatchFn;
-
-  /// Name - The target name.
-  const char *Name;
-
-  /// ShortDesc - A short description of the target.
-  const char *ShortDesc;
-
-  /// BackendName - The name of the backend implementation. This must match the
-  /// name of the 'def X : Target ...' in TableGen.
-  const char *BackendName;
-
-  /// HasJIT - Whether this target supports the JIT.
-  bool HasJIT;
-
-  /// MCAsmInfoCtorFn - Constructor function for this target's MCAsmInfo, if
-  /// registered.
-  MCAsmInfoCtorFnTy MCAsmInfoCtorFn;
-
-  /// Constructor function for this target's MCObjectFileInfo, if registered.
-  MCObjectFileInfoCtorFnTy MCObjectFileInfoCtorFn;
-
-  /// MCInstrInfoCtorFn - Constructor function for this target's MCInstrInfo,
-  /// if registered.
-  MCInstrInfoCtorFnTy MCInstrInfoCtorFn;
-
-  /// MCInstrAnalysisCtorFn - Constructor function for this target's
-  /// MCInstrAnalysis, if registered.
-  MCInstrAnalysisCtorFnTy MCInstrAnalysisCtorFn;
-
-  /// MCRegInfoCtorFn - Constructor function for this target's MCRegisterInfo,
-  /// if registered.
-  MCRegInfoCtorFnTy MCRegInfoCtorFn;
-
-  /// MCSubtargetInfoCtorFn - Constructor function for this target's
-  /// MCSubtargetInfo, if registered.
-  MCSubtargetInfoCtorFnTy MCSubtargetInfoCtorFn;
-
-  /// TargetMachineCtorFn - Construction function for this target's
-  /// TargetMachine, if registered.
-  TargetMachineCtorTy TargetMachineCtorFn;
-
-  /// MCAsmBackendCtorFn - Construction function for this target's
-  /// MCAsmBackend, if registered.
-  MCAsmBackendCtorTy MCAsmBackendCtorFn;
-
-  /// MCAsmParserCtorFn - Construction function for this target's
-  /// MCTargetAsmParser, if registered.
-  MCAsmParserCtorTy MCAsmParserCtorFn;
-
-  /// AsmPrinterCtorFn - Construction function for this target's AsmPrinter,
-  /// if registered.
-  AsmPrinterCtorTy AsmPrinterCtorFn;
-
-  /// MCDisassemblerCtorFn - Construction function for this target's
-  /// MCDisassembler, if registered.
-  MCDisassemblerCtorTy MCDisassemblerCtorFn;
-
-  /// MCInstPrinterCtorFn - Construction function for this target's
-  /// MCInstPrinter, if registered.
-  MCInstPrinterCtorTy MCInstPrinterCtorFn;
-
-  /// MCCodeEmitterCtorFn - Construction function for this target's
-  /// CodeEmitter, if registered.
-  MCCodeEmitterCtorTy MCCodeEmitterCtorFn;
-
-  // Construction functions for the various object formats, if registered.
-  COFFStreamerCtorTy COFFStreamerCtorFn = nullptr;
-  MachOStreamerCtorTy MachOStreamerCtorFn = nullptr;
-  ELFStreamerCtorTy ELFStreamerCtorFn = nullptr;
-  WasmStreamerCtorTy WasmStreamerCtorFn = nullptr;
-  XCOFFStreamerCtorTy XCOFFStreamerCtorFn = nullptr;
-
-  /// Construction function for this target's null TargetStreamer, if
-  /// registered (default = nullptr).
-  NullTargetStreamerCtorTy NullTargetStreamerCtorFn = nullptr;
-
-  /// Construction function for this target's asm TargetStreamer, if
-  /// registered (default = nullptr).
-  AsmTargetStreamerCtorTy AsmTargetStreamerCtorFn = nullptr;
-
-  /// Construction function for this target's obj TargetStreamer, if
-  /// registered (default = nullptr).
-  ObjectTargetStreamerCtorTy ObjectTargetStreamerCtorFn = nullptr;
-
-  /// MCRelocationInfoCtorFn - Construction function for this target's
-  /// MCRelocationInfo, if registered (default = llvm::createMCRelocationInfo)
-  MCRelocationInfoCtorTy MCRelocationInfoCtorFn = nullptr;
-
-  /// MCSymbolizerCtorFn - Construction function for this target's
-  /// MCSymbolizer, if registered (default = llvm::createMCSymbolizer)
-  MCSymbolizerCtorTy MCSymbolizerCtorFn = nullptr;
-
-public:
-  Target() = default;
-
-  /// @name Target Information
-  /// @{
-
-  // getNext - Return the next registered target.
-  const Target *getNext() const { return Next; }
-
-  /// getName - Get the target name.
-  const char *getName() const { return Name; }
-
-  /// getShortDescription - Get a short description of the target.
-  const char *getShortDescription() const { return ShortDesc; }
-
-  /// getBackendName - Get the backend name.
-  const char *getBackendName() const { return BackendName; }
-
-  /// @}
-  /// @name Feature Predicates
-  /// @{
-
-  /// hasJIT - Check if this targets supports the just-in-time compilation.
-  bool hasJIT() const { return HasJIT; }
-
-  /// hasTargetMachine - Check if this target supports code generation.
-  bool hasTargetMachine() const { return TargetMachineCtorFn != nullptr; }
-
-  /// hasMCAsmBackend - Check if this target supports .o generation.
-  bool hasMCAsmBackend() const { return MCAsmBackendCtorFn != nullptr; }
-
-  /// hasMCAsmParser - Check if this target supports assembly parsing.
-  bool hasMCAsmParser() const { return MCAsmParserCtorFn != nullptr; }
-
-  /// @}
-  /// @name Feature Constructors
-  /// @{
-
-  /// createMCAsmInfo - Create a MCAsmInfo implementation for the specified
-  /// target triple.
-  ///
-  /// \param TheTriple This argument is used to determine the target machine
-  /// feature set; it should always be provided. Generally this should be
-  /// either the target triple from the module, or the target triple of the
-  /// host if that does not exist.
-  MCAsmInfo *createMCAsmInfo(const MCRegisterInfo &MRI, StringRef TheTriple,
-                             const MCTargetOptions &Options) const {
-    if (!MCAsmInfoCtorFn)
-      return nullptr;
-    return MCAsmInfoCtorFn(MRI, Triple(TheTriple), Options);
-  }
-
-  /// Create a MCObjectFileInfo implementation for the specified target
-  /// triple.
-  ///
-  MCObjectFileInfo *createMCObjectFileInfo(MCContext &Ctx, bool PIC,
-                                           bool LargeCodeModel = false) const {
-    if (!MCObjectFileInfoCtorFn) {
-      MCObjectFileInfo *MOFI = new MCObjectFileInfo();
-      MOFI->initMCObjectFileInfo(Ctx, PIC, LargeCodeModel);
-      return MOFI;
-    }
-    return MCObjectFileInfoCtorFn(Ctx, PIC, LargeCodeModel);
-  }
-
-  /// createMCInstrInfo - Create a MCInstrInfo implementation.
-  ///
-  MCInstrInfo *createMCInstrInfo() const {
-    if (!MCInstrInfoCtorFn)
-      return nullptr;
-    return MCInstrInfoCtorFn();
-  }
-
-  /// createMCInstrAnalysis - Create a MCInstrAnalysis implementation.
-  ///
-  MCInstrAnalysis *createMCInstrAnalysis(const MCInstrInfo *Info) const {
-    if (!MCInstrAnalysisCtorFn)
-      return nullptr;
-    return MCInstrAnalysisCtorFn(Info);
-  }
-
-  /// createMCRegInfo - Create a MCRegisterInfo implementation.
-  ///
-  MCRegisterInfo *createMCRegInfo(StringRef TT) const {
-    if (!MCRegInfoCtorFn)
-      return nullptr;
-    return MCRegInfoCtorFn(Triple(TT));
-  }
-
-  /// createMCSubtargetInfo - Create a MCSubtargetInfo implementation.
-  ///
-  /// \param TheTriple This argument is used to determine the target machine
-  /// feature set; it should always be provided. Generally this should be
-  /// either the target triple from the module, or the target triple of the
-  /// host if that does not exist.
-  /// \param CPU This specifies the name of the target CPU.
-  /// \param Features This specifies the string representation of the
-  /// additional target features.
-  MCSubtargetInfo *createMCSubtargetInfo(StringRef TheTriple, StringRef CPU,
-                                         StringRef Features) const {
-    if (!MCSubtargetInfoCtorFn)
-      return nullptr;
-    return MCSubtargetInfoCtorFn(Triple(TheTriple), CPU, Features);
-  }
-
-  /// createTargetMachine - Create a target specific machine implementation
-  /// for the specified \p Triple.
-  ///
-  /// \param TT This argument is used to determine the target machine
-  /// feature set; it should always be provided. Generally this should be
-  /// either the target triple from the module, or the target triple of the
-  /// host if that does not exist.
-  TargetMachine *createTargetMachine(StringRef TT, StringRef CPU,
-                                     StringRef Features,
-                                     const TargetOptions &Options,
-                                     Optional<Reloc::Model> RM,
-                                     Optional<CodeModel::Model> CM = None,
-                                     CodeGenOpt::Level OL = CodeGenOpt::Default,
-                                     bool JIT = false) const {
-    if (!TargetMachineCtorFn)
-      return nullptr;
-    return TargetMachineCtorFn(*this, Triple(TT), CPU, Features, Options, RM,
-                               CM, OL, JIT);
-  }
-
-  /// createMCAsmBackend - Create a target specific assembly parser.
-  MCAsmBackend *createMCAsmBackend(const MCSubtargetInfo &STI,
-                                   const MCRegisterInfo &MRI,
-                                   const MCTargetOptions &Options) const {
-    if (!MCAsmBackendCtorFn)
-      return nullptr;
-    return MCAsmBackendCtorFn(*this, STI, MRI, Options);
-  }
-
-  /// createMCAsmParser - Create a target specific assembly parser.
-  ///
-  /// \param Parser The target independent parser implementation to use for
-  /// parsing and lexing.
-  MCTargetAsmParser *createMCAsmParser(const MCSubtargetInfo &STI,
-                                       MCAsmParser &Parser,
-                                       const MCInstrInfo &MII,
-                                       const MCTargetOptions &Options) const {
-    if (!MCAsmParserCtorFn)
-      return nullptr;
-    return MCAsmParserCtorFn(STI, Parser, MII, Options);
-  }
-
-  /// createAsmPrinter - Create a target specific assembly printer pass.  This
-  /// takes ownership of the MCStreamer object.
-  AsmPrinter *createAsmPrinter(TargetMachine &TM,
-                               std::unique_ptr<MCStreamer> &&Streamer) const {
-    if (!AsmPrinterCtorFn)
-      return nullptr;
-    return AsmPrinterCtorFn(TM, std::move(Streamer));
-  }
-
-  MCDisassembler *createMCDisassembler(const MCSubtargetInfo &STI,
-                                       MCContext &Ctx) const {
-    if (!MCDisassemblerCtorFn)
-      return nullptr;
-    return MCDisassemblerCtorFn(*this, STI, Ctx);
-  }
-
-  MCInstPrinter *createMCInstPrinter(const Triple &T, unsigned SyntaxVariant,
-                                     const MCAsmInfo &MAI,
-                                     const MCInstrInfo &MII,
-                                     const MCRegisterInfo &MRI) const {
-    if (!MCInstPrinterCtorFn)
-      return nullptr;
-    return MCInstPrinterCtorFn(T, SyntaxVariant, MAI, MII, MRI);
-  }
-
-  /// createMCCodeEmitter - Create a target specific code emitter.
-  MCCodeEmitter *createMCCodeEmitter(const MCInstrInfo &II,
-                                     const MCRegisterInfo &MRI,
-                                     MCContext &Ctx) const {
-    if (!MCCodeEmitterCtorFn)
-      return nullptr;
-    return MCCodeEmitterCtorFn(II, MRI, Ctx);
-  }
-
-  /// Create a target specific MCStreamer.
-  ///
-  /// \param T The target triple.
-  /// \param Ctx The target context.
-  /// \param TAB The target assembler backend object. Takes ownership.
-  /// \param OW The stream object.
-  /// \param Emitter The target independent assembler object.Takes ownership.
-  /// \param RelaxAll Relax all fixups?
-  MCStreamer *createMCObjectStreamer(const Triple &T, MCContext &Ctx,
-                                     std::unique_ptr<MCAsmBackend> &&TAB,
-                                     std::unique_ptr<MCObjectWriter> &&OW,
-                                     std::unique_ptr<MCCodeEmitter> &&Emitter,
-                                     const MCSubtargetInfo &STI, bool RelaxAll,
-                                     bool IncrementalLinkerCompatible,
-                                     bool DWARFMustBeAtTheEnd) const {
-    MCStreamer *S = nullptr;
-    switch (T.getObjectFormat()) {
-    case Triple::UnknownObjectFormat:
-      llvm_unreachable("Unknown object format");
-    case Triple::COFF:
-      assert(T.isOSWindows() && "only Windows COFF is supported");
-      S = COFFStreamerCtorFn(Ctx, std::move(TAB), std::move(OW),
-                             std::move(Emitter), RelaxAll,
-                             IncrementalLinkerCompatible);
-      break;
-    case Triple::MachO:
-      if (MachOStreamerCtorFn)
-        S = MachOStreamerCtorFn(Ctx, std::move(TAB), std::move(OW),
-                                std::move(Emitter), RelaxAll,
-                                DWARFMustBeAtTheEnd);
-      else
-        S = createMachOStreamer(Ctx, std::move(TAB), std::move(OW),
-                                std::move(Emitter), RelaxAll,
-                                DWARFMustBeAtTheEnd);
-      break;
-    case Triple::ELF:
-      if (ELFStreamerCtorFn)
-        S = ELFStreamerCtorFn(T, Ctx, std::move(TAB), std::move(OW),
-                              std::move(Emitter), RelaxAll);
-      else
-        S = createELFStreamer(Ctx, std::move(TAB), std::move(OW),
-                              std::move(Emitter), RelaxAll);
-      break;
-    case Triple::Wasm:
-      if (WasmStreamerCtorFn)
-        S = WasmStreamerCtorFn(T, Ctx, std::move(TAB), std::move(OW),
-                               std::move(Emitter), RelaxAll);
-      else
-        S = createWasmStreamer(Ctx, std::move(TAB), std::move(OW),
-                               std::move(Emitter), RelaxAll);
-      break;
-    case Triple::GOFF:
-      report_fatal_error("GOFF MCObjectStreamer not implemented yet");
-    case Triple::XCOFF:
-      if (XCOFFStreamerCtorFn)
-        S = XCOFFStreamerCtorFn(T, Ctx, std::move(TAB), std::move(OW),
-                                std::move(Emitter), RelaxAll);
-      else
-        S = createXCOFFStreamer(Ctx, std::move(TAB), std::move(OW),
-                                std::move(Emitter), RelaxAll);
-      break;
-    }
-    if (ObjectTargetStreamerCtorFn)
-      ObjectTargetStreamerCtorFn(*S, STI);
-    return S;
-  }
-
-  MCStreamer *createAsmStreamer(MCContext &Ctx,
-                                std::unique_ptr<formatted_raw_ostream> OS,
-                                bool IsVerboseAsm, bool UseDwarfDirectory,
-                                MCInstPrinter *InstPrint,
-                                std::unique_ptr<MCCodeEmitter> &&CE,
-                                std::unique_ptr<MCAsmBackend> &&TAB,
-                                bool ShowInst) const {
-    formatted_raw_ostream &OSRef = *OS;
-    MCStreamer *S = llvm::createAsmStreamer(
-        Ctx, std::move(OS), IsVerboseAsm, UseDwarfDirectory, InstPrint,
-        std::move(CE), std::move(TAB), ShowInst);
-    createAsmTargetStreamer(*S, OSRef, InstPrint, IsVerboseAsm);
-    return S;
-  }
-
-  MCTargetStreamer *createAsmTargetStreamer(MCStreamer &S,
-                                            formatted_raw_ostream &OS,
-                                            MCInstPrinter *InstPrint,
-                                            bool IsVerboseAsm) const {
-    if (AsmTargetStreamerCtorFn)
-      return AsmTargetStreamerCtorFn(S, OS, InstPrint, IsVerboseAsm);
-    return nullptr;
-  }
-
-  MCStreamer *createNullStreamer(MCContext &Ctx) const {
-    MCStreamer *S = llvm::createNullStreamer(Ctx);
-    createNullTargetStreamer(*S);
-    return S;
-  }
-
-  MCTargetStreamer *createNullTargetStreamer(MCStreamer &S) const {
-    if (NullTargetStreamerCtorFn)
-      return NullTargetStreamerCtorFn(S);
-    return nullptr;
-  }
-
-  /// createMCRelocationInfo - Create a target specific MCRelocationInfo.
-  ///
-  /// \param TT The target triple.
-  /// \param Ctx The target context.
-  MCRelocationInfo *createMCRelocationInfo(StringRef TT, MCContext &Ctx) const {
-    MCRelocationInfoCtorTy Fn = MCRelocationInfoCtorFn
-                                    ? MCRelocationInfoCtorFn
-                                    : llvm::createMCRelocationInfo;
-    return Fn(Triple(TT), Ctx);
-  }
-
-  /// createMCSymbolizer - Create a target specific MCSymbolizer.
-  ///
-  /// \param TT The target triple.
-  /// \param GetOpInfo The function to get the symbolic information for
-  /// operands.
-  /// \param SymbolLookUp The function to lookup a symbol name.
-  /// \param DisInfo The pointer to the block of symbolic information for above
-  /// call
-  /// back.
-  /// \param Ctx The target context.
-  /// \param RelInfo The relocation information for this target. Takes
-  /// ownership.
-  MCSymbolizer *
-  createMCSymbolizer(StringRef TT, LLVMOpInfoCallback GetOpInfo,
-                     LLVMSymbolLookupCallback SymbolLookUp, void *DisInfo,
-                     MCContext *Ctx,
-                     std::unique_ptr<MCRelocationInfo> &&RelInfo) const {
-    MCSymbolizerCtorTy Fn =
-        MCSymbolizerCtorFn ? MCSymbolizerCtorFn : llvm::createMCSymbolizer;
-    return Fn(Triple(TT), GetOpInfo, SymbolLookUp, DisInfo, Ctx,
-              std::move(RelInfo));
-  }
-
-  /// @}
-};
-
-/// TargetRegistry - Generic interface to target specific features.
-struct TargetRegistry {
-  // FIXME: Make this a namespace, probably just move all the Register*
-  // functions into Target (currently they all just set members on the Target
-  // anyway, and Target friends this class so those functions can...
-  // function).
-  TargetRegistry() = delete;
-
-  class iterator {
-    friend struct TargetRegistry;
-
-    const Target *Current = nullptr;
-
-    explicit iterator(Target *T) : Current(T) {}
-
-  public:
-    using iterator_category = std::forward_iterator_tag;
-    using value_type = Target;
-    using difference_type = std::ptrdiff_t;
-    using pointer = value_type *;
-    using reference = value_type &;
-
-    iterator() = default;
-
-    bool operator==(const iterator &x) const { return Current == x.Current; }
-    bool operator!=(const iterator &x) const { return !operator==(x); }
-
-    // Iterator traversal: forward iteration only
-    iterator &operator++() { // Preincrement
-      assert(Current && "Cannot increment end iterator!");
-      Current = Current->getNext();
-      return *this;
-    }
-    iterator operator++(int) { // Postincrement
-      iterator tmp = *this;
-      ++*this;
-      return tmp;
-    }
-
-    const Target &operator*() const {
-      assert(Current && "Cannot dereference end iterator!");
-      return *Current;
-    }
-
-    const Target *operator->() const { return &operator*(); }
-  };
-
-  /// printRegisteredTargetsForVersion - Print the registered targets
-  /// appropriately for inclusion in a tool's version output.
-  static void printRegisteredTargetsForVersion(raw_ostream &OS);
-
-  /// @name Registry Access
-  /// @{
-
-  static iterator_range<iterator> targets();
-
-  /// lookupTarget - Lookup a target based on a target triple.
-  ///
-  /// \param Triple - The triple to use for finding a target.
-  /// \param Error - On failure, an error string describing why no target was
-  /// found.
-  static const Target *lookupTarget(const std::string &Triple,
-                                    std::string &Error);
-
-  /// lookupTarget - Lookup a target based on an architecture name
-  /// and a target triple.  If the architecture name is non-empty,
-  /// then the lookup is done by architecture.  Otherwise, the target
-  /// triple is used.
-  ///
-  /// \param ArchName - The architecture to use for finding a target.
-  /// \param TheTriple - The triple to use for finding a target.  The
-  /// triple is updated with canonical architecture name if a lookup
-  /// by architecture is done.
-  /// \param Error - On failure, an error string describing why no target was
-  /// found.
-  static const Target *lookupTarget(const std::string &ArchName,
-                                    Triple &TheTriple, std::string &Error);
-
-  /// @}
-  /// @name Target Registration
-  /// @{
-
-  /// RegisterTarget - Register the given target. Attempts to register a
-  /// target which has already been registered will be ignored.
-  ///
-  /// Clients are responsible for ensuring that registration doesn't occur
-  /// while another thread is attempting to access the registry. Typically
-  /// this is done by initializing all targets at program startup.
-  ///
-  /// @param T - The target being registered.
-  /// @param Name - The target name. This should be a static string.
-  /// @param ShortDesc - A short target description. This should be a static
-  /// string.
-  /// @param BackendName - The name of the backend. This should be a static
-  /// string that is the same for all targets that share a backend
-  /// implementation and must match the name used in the 'def X : Target ...' in
-  /// TableGen.
-  /// @param ArchMatchFn - The arch match checking function for this target.
-  /// @param HasJIT - Whether the target supports JIT code
-  /// generation.
-  static void RegisterTarget(Target &T, const char *Name, const char *ShortDesc,
-                             const char *BackendName,
-                             Target::ArchMatchFnTy ArchMatchFn,
-                             bool HasJIT = false);
-
-  /// RegisterMCAsmInfo - Register a MCAsmInfo implementation for the
-  /// given target.
-  ///
-  /// Clients are responsible for ensuring that registration doesn't occur
-  /// while another thread is attempting to access the registry. Typically
-  /// this is done by initializing all targets at program startup.
-  ///
-  /// @param T - The target being registered.
-  /// @param Fn - A function to construct a MCAsmInfo for the target.
-  static void RegisterMCAsmInfo(Target &T, Target::MCAsmInfoCtorFnTy Fn) {
-    T.MCAsmInfoCtorFn = Fn;
-  }
-
-  /// Register a MCObjectFileInfo implementation for the given target.
-  ///
-  /// Clients are responsible for ensuring that registration doesn't occur
-  /// while another thread is attempting to access the registry. Typically
-  /// this is done by initializing all targets at program startup.
-  ///
-  /// @param T - The target being registered.
-  /// @param Fn - A function to construct a MCObjectFileInfo for the target.
-  static void RegisterMCObjectFileInfo(Target &T,
-                                       Target::MCObjectFileInfoCtorFnTy Fn) {
-    T.MCObjectFileInfoCtorFn = Fn;
-  }
-
-  /// RegisterMCInstrInfo - Register a MCInstrInfo implementation for the
-  /// given target.
-  ///
-  /// Clients are responsible for ensuring that registration doesn't occur
-  /// while another thread is attempting to access the registry. Typically
-  /// this is done by initializing all targets at program startup.
-  ///
-  /// @param T - The target being registered.
-  /// @param Fn - A function to construct a MCInstrInfo for the target.
-  static void RegisterMCInstrInfo(Target &T, Target::MCInstrInfoCtorFnTy Fn) {
-    T.MCInstrInfoCtorFn = Fn;
-  }
-
-  /// RegisterMCInstrAnalysis - Register a MCInstrAnalysis implementation for
-  /// the given target.
-  static void RegisterMCInstrAnalysis(Target &T,
-                                      Target::MCInstrAnalysisCtorFnTy Fn) {
-    T.MCInstrAnalysisCtorFn = Fn;
-  }
-
-  /// RegisterMCRegInfo - Register a MCRegisterInfo implementation for the
-  /// given target.
-  ///
-  /// Clients are responsible for ensuring that registration doesn't occur
-  /// while another thread is attempting to access the registry. Typically
-  /// this is done by initializing all targets at program startup.
-  ///
-  /// @param T - The target being registered.
-  /// @param Fn - A function to construct a MCRegisterInfo for the target.
-  static void RegisterMCRegInfo(Target &T, Target::MCRegInfoCtorFnTy Fn) {
-    T.MCRegInfoCtorFn = Fn;
-  }
-
-  /// RegisterMCSubtargetInfo - Register a MCSubtargetInfo implementation for
-  /// the given target.
-  ///
-  /// Clients are responsible for ensuring that registration doesn't occur
-  /// while another thread is attempting to access the registry. Typically
-  /// this is done by initializing all targets at program startup.
-  ///
-  /// @param T - The target being registered.
-  /// @param Fn - A function to construct a MCSubtargetInfo for the target.
-  static void RegisterMCSubtargetInfo(Target &T,
-                                      Target::MCSubtargetInfoCtorFnTy Fn) {
-    T.MCSubtargetInfoCtorFn = Fn;
-  }
-
-  /// RegisterTargetMachine - Register a TargetMachine implementation for the
-  /// given target.
-  ///
-  /// Clients are responsible for ensuring that registration doesn't occur
-  /// while another thread is attempting to access the registry. Typically
-  /// this is done by initializing all targets at program startup.
-  ///
-  /// @param T - The target being registered.
-  /// @param Fn - A function to construct a TargetMachine for the target.
-  static void RegisterTargetMachine(Target &T, Target::TargetMachineCtorTy Fn) {
-    T.TargetMachineCtorFn = Fn;
-  }
-
-  /// RegisterMCAsmBackend - Register a MCAsmBackend implementation for the
-  /// given target.
-  ///
-  /// Clients are responsible for ensuring that registration doesn't occur
-  /// while another thread is attempting to access the registry. Typically
-  /// this is done by initializing all targets at program startup.
-  ///
-  /// @param T - The target being registered.
-  /// @param Fn - A function to construct an AsmBackend for the target.
-  static void RegisterMCAsmBackend(Target &T, Target::MCAsmBackendCtorTy Fn) {
-    T.MCAsmBackendCtorFn = Fn;
-  }
-
-  /// RegisterMCAsmParser - Register a MCTargetAsmParser implementation for
-  /// the given target.
-  ///
-  /// Clients are responsible for ensuring that registration doesn't occur
-  /// while another thread is attempting to access the registry. Typically
-  /// this is done by initializing all targets at program startup.
-  ///
-  /// @param T - The target being registered.
-  /// @param Fn - A function to construct an MCTargetAsmParser for the target.
-  static void RegisterMCAsmParser(Target &T, Target::MCAsmParserCtorTy Fn) {
-    T.MCAsmParserCtorFn = Fn;
-  }
-
-  /// RegisterAsmPrinter - Register an AsmPrinter implementation for the given
-  /// target.
-  ///
-  /// Clients are responsible for ensuring that registration doesn't occur
-  /// while another thread is attempting to access the registry. Typically
-  /// this is done by initializing all targets at program startup.
-  ///
-  /// @param T - The target being registered.
-  /// @param Fn - A function to construct an AsmPrinter for the target.
-  static void RegisterAsmPrinter(Target &T, Target::AsmPrinterCtorTy Fn) {
-    T.AsmPrinterCtorFn = Fn;
-  }
-
-  /// RegisterMCDisassembler - Register a MCDisassembler implementation for
-  /// the given target.
-  ///
-  /// Clients are responsible for ensuring that registration doesn't occur
-  /// while another thread is attempting to access the registry. Typically
-  /// this is done by initializing all targets at program startup.
-  ///
-  /// @param T - The target being registered.
-  /// @param Fn - A function to construct an MCDisassembler for the target.
-  static void RegisterMCDisassembler(Target &T,
-                                     Target::MCDisassemblerCtorTy Fn) {
-    T.MCDisassemblerCtorFn = Fn;
-  }
-
-  /// RegisterMCInstPrinter - Register a MCInstPrinter implementation for the
-  /// given target.
-  ///
-  /// Clients are responsible for ensuring that registration doesn't occur
-  /// while another thread is attempting to access the registry. Typically
-  /// this is done by initializing all targets at program startup.
-  ///
-  /// @param T - The target being registered.
-  /// @param Fn - A function to construct an MCInstPrinter for the target.
-  static void RegisterMCInstPrinter(Target &T, Target::MCInstPrinterCtorTy Fn) {
-    T.MCInstPrinterCtorFn = Fn;
-  }
-
-  /// RegisterMCCodeEmitter - Register a MCCodeEmitter implementation for the
-  /// given target.
-  ///
-  /// Clients are responsible for ensuring that registration doesn't occur
-  /// while another thread is attempting to access the registry. Typically
-  /// this is done by initializing all targets at program startup.
-  ///
-  /// @param T - The target being registered.
-  /// @param Fn - A function to construct an MCCodeEmitter for the target.
-  static void RegisterMCCodeEmitter(Target &T, Target::MCCodeEmitterCtorTy Fn) {
-    T.MCCodeEmitterCtorFn = Fn;
-  }
-
-  static void RegisterCOFFStreamer(Target &T, Target::COFFStreamerCtorTy Fn) {
-    T.COFFStreamerCtorFn = Fn;
-  }
-
-  static void RegisterMachOStreamer(Target &T, Target::MachOStreamerCtorTy Fn) {
-    T.MachOStreamerCtorFn = Fn;
-  }
-
-  static void RegisterELFStreamer(Target &T, Target::ELFStreamerCtorTy Fn) {
-    T.ELFStreamerCtorFn = Fn;
-  }
-
-  static void RegisterWasmStreamer(Target &T, Target::WasmStreamerCtorTy Fn) {
-    T.WasmStreamerCtorFn = Fn;
-  }
-
-  static void RegisterXCOFFStreamer(Target &T, Target::XCOFFStreamerCtorTy Fn) {
-    T.XCOFFStreamerCtorFn = Fn;
-  }
-
-  static void RegisterNullTargetStreamer(Target &T,
-                                         Target::NullTargetStreamerCtorTy Fn) {
-    T.NullTargetStreamerCtorFn = Fn;
-  }
-
-  static void RegisterAsmTargetStreamer(Target &T,
-                                        Target::AsmTargetStreamerCtorTy Fn) {
-    T.AsmTargetStreamerCtorFn = Fn;
-  }
-
-  static void
-  RegisterObjectTargetStreamer(Target &T,
-                               Target::ObjectTargetStreamerCtorTy Fn) {
-    T.ObjectTargetStreamerCtorFn = Fn;
-  }
-
-  /// RegisterMCRelocationInfo - Register an MCRelocationInfo
-  /// implementation for the given target.
-  ///
-  /// Clients are responsible for ensuring that registration doesn't occur
-  /// while another thread is attempting to access the registry. Typically
-  /// this is done by initializing all targets at program startup.
-  ///
-  /// @param T - The target being registered.
-  /// @param Fn - A function to construct an MCRelocationInfo for the target.
-  static void RegisterMCRelocationInfo(Target &T,
-                                       Target::MCRelocationInfoCtorTy Fn) {
-    T.MCRelocationInfoCtorFn = Fn;
-  }
-
-  /// RegisterMCSymbolizer - Register an MCSymbolizer
-  /// implementation for the given target.
-  ///
-  /// Clients are responsible for ensuring that registration doesn't occur
-  /// while another thread is attempting to access the registry. Typically
-  /// this is done by initializing all targets at program startup.
-  ///
-  /// @param T - The target being registered.
-  /// @param Fn - A function to construct an MCSymbolizer for the target.
-  static void RegisterMCSymbolizer(Target &T, Target::MCSymbolizerCtorTy Fn) {
-    T.MCSymbolizerCtorFn = Fn;
-  }
-
-  /// @}
-};
-
-//===--------------------------------------------------------------------===//
-
-/// RegisterTarget - Helper template for registering a target, for use in the
-/// target's initialization function. Usage:
-///
-///
-/// Target &getTheFooTarget() { // The global target instance.
-///   static Target TheFooTarget;
-///   return TheFooTarget;
-/// }
-/// extern "C" void LLVMInitializeFooTargetInfo() {
-///   RegisterTarget<Triple::foo> X(getTheFooTarget(), "foo", "Foo
-///   description", "Foo" /* Backend Name */);
-/// }
-template <Triple::ArchType TargetArchType = Triple::UnknownArch,
-          bool HasJIT = false>
-struct RegisterTarget {
-  RegisterTarget(Target &T, const char *Name, const char *Desc,
-                 const char *BackendName) {
-    TargetRegistry::RegisterTarget(T, Name, Desc, BackendName, &getArchMatch,
-                                   HasJIT);
-  }
-
-  static bool getArchMatch(Triple::ArchType Arch) {
-    return Arch == TargetArchType;
-  }
-};
-
-/// RegisterMCAsmInfo - Helper template for registering a target assembly info
-/// implementation.  This invokes the static "Create" method on the class to
-/// actually do the construction.  Usage:
-///
-/// extern "C" void LLVMInitializeFooTarget() {
-///   extern Target TheFooTarget;
-///   RegisterMCAsmInfo<FooMCAsmInfo> X(TheFooTarget);
-/// }
-template <class MCAsmInfoImpl> struct RegisterMCAsmInfo {
-  RegisterMCAsmInfo(Target &T) {
-    TargetRegistry::RegisterMCAsmInfo(T, &Allocator);
-  }
-
-private:
-  static MCAsmInfo *Allocator(const MCRegisterInfo & /*MRI*/, const Triple &TT,
-                              const MCTargetOptions &Options) {
-    return new MCAsmInfoImpl(TT, Options);
-  }
-};
-
-/// RegisterMCAsmInfoFn - Helper template for registering a target assembly info
-/// implementation.  This invokes the specified function to do the
-/// construction.  Usage:
-///
-/// extern "C" void LLVMInitializeFooTarget() {
-///   extern Target TheFooTarget;
-///   RegisterMCAsmInfoFn X(TheFooTarget, TheFunction);
-/// }
-struct RegisterMCAsmInfoFn {
-  RegisterMCAsmInfoFn(Target &T, Target::MCAsmInfoCtorFnTy Fn) {
-    TargetRegistry::RegisterMCAsmInfo(T, Fn);
-  }
-};
-
-/// Helper template for registering a target object file info implementation.
-/// This invokes the static "Create" method on the class to actually do the
-/// construction.  Usage:
-///
-/// extern "C" void LLVMInitializeFooTarget() {
-///   extern Target TheFooTarget;
-///   RegisterMCObjectFileInfo<FooMCObjectFileInfo> X(TheFooTarget);
-/// }
-template <class MCObjectFileInfoImpl> struct RegisterMCObjectFileInfo {
-  RegisterMCObjectFileInfo(Target &T) {
-    TargetRegistry::RegisterMCObjectFileInfo(T, &Allocator);
-  }
-
-private:
-  static MCObjectFileInfo *Allocator(MCContext &Ctx, bool PIC,
-                                     bool LargeCodeModel = false) {
-    return new MCObjectFileInfoImpl(Ctx, PIC, LargeCodeModel);
-  }
-};
-
-/// Helper template for registering a target object file info implementation.
-/// This invokes the specified function to do the construction.  Usage:
-///
-/// extern "C" void LLVMInitializeFooTarget() {
-///   extern Target TheFooTarget;
-///   RegisterMCObjectFileInfoFn X(TheFooTarget, TheFunction);
-/// }
-struct RegisterMCObjectFileInfoFn {
-  RegisterMCObjectFileInfoFn(Target &T, Target::MCObjectFileInfoCtorFnTy Fn) {
-    TargetRegistry::RegisterMCObjectFileInfo(T, Fn);
-  }
-};
-
-/// RegisterMCInstrInfo - Helper template for registering a target instruction
-/// info implementation.  This invokes the static "Create" method on the class
-/// to actually do the construction.  Usage:
-///
-/// extern "C" void LLVMInitializeFooTarget() {
-///   extern Target TheFooTarget;
-///   RegisterMCInstrInfo<FooMCInstrInfo> X(TheFooTarget);
-/// }
-template <class MCInstrInfoImpl> struct RegisterMCInstrInfo {
-  RegisterMCInstrInfo(Target &T) {
-    TargetRegistry::RegisterMCInstrInfo(T, &Allocator);
-  }
-
-private:
-  static MCInstrInfo *Allocator() { return new MCInstrInfoImpl(); }
-};
-
-/// RegisterMCInstrInfoFn - Helper template for registering a target
-/// instruction info implementation.  This invokes the specified function to
-/// do the construction.  Usage:
-///
-/// extern "C" void LLVMInitializeFooTarget() {
-///   extern Target TheFooTarget;
-///   RegisterMCInstrInfoFn X(TheFooTarget, TheFunction);
-/// }
-struct RegisterMCInstrInfoFn {
-  RegisterMCInstrInfoFn(Target &T, Target::MCInstrInfoCtorFnTy Fn) {
-    TargetRegistry::RegisterMCInstrInfo(T, Fn);
-  }
-};
-
-/// RegisterMCInstrAnalysis - Helper template for registering a target
-/// instruction analyzer implementation.  This invokes the static "Create"
-/// method on the class to actually do the construction.  Usage:
-///
-/// extern "C" void LLVMInitializeFooTarget() {
-///   extern Target TheFooTarget;
-///   RegisterMCInstrAnalysis<FooMCInstrAnalysis> X(TheFooTarget);
-/// }
-template <class MCInstrAnalysisImpl> struct RegisterMCInstrAnalysis {
-  RegisterMCInstrAnalysis(Target &T) {
-    TargetRegistry::RegisterMCInstrAnalysis(T, &Allocator);
-  }
-
-private:
-  static MCInstrAnalysis *Allocator(const MCInstrInfo *Info) {
-    return new MCInstrAnalysisImpl(Info);
-  }
-};
-
-/// RegisterMCInstrAnalysisFn - Helper template for registering a target
-/// instruction analyzer implementation.  This invokes the specified function
-/// to do the construction.  Usage:
-///
-/// extern "C" void LLVMInitializeFooTarget() {
-///   extern Target TheFooTarget;
-///   RegisterMCInstrAnalysisFn X(TheFooTarget, TheFunction);
-/// }
-struct RegisterMCInstrAnalysisFn {
-  RegisterMCInstrAnalysisFn(Target &T, Target::MCInstrAnalysisCtorFnTy Fn) {
-    TargetRegistry::RegisterMCInstrAnalysis(T, Fn);
-  }
-};
-
-/// RegisterMCRegInfo - Helper template for registering a target register info
-/// implementation.  This invokes the static "Create" method on the class to
-/// actually do the construction.  Usage:
-///
-/// extern "C" void LLVMInitializeFooTarget() {
-///   extern Target TheFooTarget;
-///   RegisterMCRegInfo<FooMCRegInfo> X(TheFooTarget);
-/// }
-template <class MCRegisterInfoImpl> struct RegisterMCRegInfo {
-  RegisterMCRegInfo(Target &T) {
-    TargetRegistry::RegisterMCRegInfo(T, &Allocator);
-  }
-
-private:
-  static MCRegisterInfo *Allocator(const Triple & /*TT*/) {
-    return new MCRegisterInfoImpl();
-  }
-};
-
-/// RegisterMCRegInfoFn - Helper template for registering a target register
-/// info implementation.  This invokes the specified function to do the
-/// construction.  Usage:
-///
-/// extern "C" void LLVMInitializeFooTarget() {
-///   extern Target TheFooTarget;
-///   RegisterMCRegInfoFn X(TheFooTarget, TheFunction);
-/// }
-struct RegisterMCRegInfoFn {
-  RegisterMCRegInfoFn(Target &T, Target::MCRegInfoCtorFnTy Fn) {
-    TargetRegistry::RegisterMCRegInfo(T, Fn);
-  }
-};
-
-/// RegisterMCSubtargetInfo - Helper template for registering a target
-/// subtarget info implementation.  This invokes the static "Create" method
-/// on the class to actually do the construction.  Usage:
-///
-/// extern "C" void LLVMInitializeFooTarget() {
-///   extern Target TheFooTarget;
-///   RegisterMCSubtargetInfo<FooMCSubtargetInfo> X(TheFooTarget);
-/// }
-template <class MCSubtargetInfoImpl> struct RegisterMCSubtargetInfo {
-  RegisterMCSubtargetInfo(Target &T) {
-    TargetRegistry::RegisterMCSubtargetInfo(T, &Allocator);
-  }
-
-private:
-  static MCSubtargetInfo *Allocator(const Triple & /*TT*/, StringRef /*CPU*/,
-                                    StringRef /*FS*/) {
-    return new MCSubtargetInfoImpl();
-  }
-};
-
-/// RegisterMCSubtargetInfoFn - Helper template for registering a target
-/// subtarget info implementation.  This invokes the specified function to
-/// do the construction.  Usage:
-///
-/// extern "C" void LLVMInitializeFooTarget() {
-///   extern Target TheFooTarget;
-///   RegisterMCSubtargetInfoFn X(TheFooTarget, TheFunction);
-/// }
-struct RegisterMCSubtargetInfoFn {
-  RegisterMCSubtargetInfoFn(Target &T, Target::MCSubtargetInfoCtorFnTy Fn) {
-    TargetRegistry::RegisterMCSubtargetInfo(T, Fn);
-  }
-};
-
-/// RegisterTargetMachine - Helper template for registering a target machine
-/// implementation, for use in the target machine initialization
-/// function. Usage:
-///
-/// extern "C" void LLVMInitializeFooTarget() {
-///   extern Target TheFooTarget;
-///   RegisterTargetMachine<FooTargetMachine> X(TheFooTarget);
-/// }
-template <class TargetMachineImpl> struct RegisterTargetMachine {
-  RegisterTargetMachine(Target &T) {
-    TargetRegistry::RegisterTargetMachine(T, &Allocator);
-  }
-
-private:
-  static TargetMachine *
-  Allocator(const Target &T, const Triple &TT, StringRef CPU, StringRef FS,
-            const TargetOptions &Options, Optional<Reloc::Model> RM,
-            Optional<CodeModel::Model> CM, CodeGenOpt::Level OL, bool JIT) {
-    return new TargetMachineImpl(T, TT, CPU, FS, Options, RM, CM, OL, JIT);
-  }
-};
-
-/// RegisterMCAsmBackend - Helper template for registering a target specific
-/// assembler backend. Usage:
-///
-/// extern "C" void LLVMInitializeFooMCAsmBackend() {
-///   extern Target TheFooTarget;
-///   RegisterMCAsmBackend<FooAsmLexer> X(TheFooTarget);
-/// }
-template <class MCAsmBackendImpl> struct RegisterMCAsmBackend {
-  RegisterMCAsmBackend(Target &T) {
-    TargetRegistry::RegisterMCAsmBackend(T, &Allocator);
-  }
-
-private:
-  static MCAsmBackend *Allocator(const Target &T, const MCSubtargetInfo &STI,
-                                 const MCRegisterInfo &MRI,
-                                 const MCTargetOptions &Options) {
-    return new MCAsmBackendImpl(T, STI, MRI);
-  }
-};
-
-/// RegisterMCAsmParser - Helper template for registering a target specific
-/// assembly parser, for use in the target machine initialization
-/// function. Usage:
-///
-/// extern "C" void LLVMInitializeFooMCAsmParser() {
-///   extern Target TheFooTarget;
-///   RegisterMCAsmParser<FooAsmParser> X(TheFooTarget);
-/// }
-template <class MCAsmParserImpl> struct RegisterMCAsmParser {
-  RegisterMCAsmParser(Target &T) {
-    TargetRegistry::RegisterMCAsmParser(T, &Allocator);
-  }
-
-private:
-  static MCTargetAsmParser *Allocator(const MCSubtargetInfo &STI,
-                                      MCAsmParser &P, const MCInstrInfo &MII,
-                                      const MCTargetOptions &Options) {
-    return new MCAsmParserImpl(STI, P, MII, Options);
-  }
-};
-
-/// RegisterAsmPrinter - Helper template for registering a target specific
-/// assembly printer, for use in the target machine initialization
-/// function. Usage:
-///
-/// extern "C" void LLVMInitializeFooAsmPrinter() {
-///   extern Target TheFooTarget;
-///   RegisterAsmPrinter<FooAsmPrinter> X(TheFooTarget);
-/// }
-template <class AsmPrinterImpl> struct RegisterAsmPrinter {
-  RegisterAsmPrinter(Target &T) {
-    TargetRegistry::RegisterAsmPrinter(T, &Allocator);
-  }
-
-private:
-  static AsmPrinter *Allocator(TargetMachine &TM,
-                               std::unique_ptr<MCStreamer> &&Streamer) {
-    return new AsmPrinterImpl(TM, std::move(Streamer));
-  }
-};
-
-/// RegisterMCCodeEmitter - Helper template for registering a target specific
-/// machine code emitter, for use in the target initialization
-/// function. Usage:
-///
-/// extern "C" void LLVMInitializeFooMCCodeEmitter() {
-///   extern Target TheFooTarget;
-///   RegisterMCCodeEmitter<FooCodeEmitter> X(TheFooTarget);
-/// }
-template <class MCCodeEmitterImpl> struct RegisterMCCodeEmitter {
-  RegisterMCCodeEmitter(Target &T) {
-    TargetRegistry::RegisterMCCodeEmitter(T, &Allocator);
-  }
-
-private:
-  static MCCodeEmitter *Allocator(const MCInstrInfo & /*II*/,
-                                  const MCRegisterInfo & /*MRI*/,
-                                  MCContext & /*Ctx*/) {
-    return new MCCodeEmitterImpl();
-  }
-};
-
-} // end namespace llvm
-
-#endif // LLVM_SUPPORT_TARGETREGISTRY_H
diff --git a/llvm/include/llvm/Support/TargetSelect.h b/llvm/include/llvm/Support/TargetSelect.h
index 9ffb84c4a570..e57614cea758 100644
--- a/llvm/include/llvm/Support/TargetSelect.h
+++ b/llvm/include/llvm/Support/TargetSelect.h
@@ -41,6 +41,10 @@ extern "C" {
 #define LLVM_DISASSEMBLER(TargetName) \
   void LLVMInitialize##TargetName##Disassembler();
 #include "llvm/Config/Disassemblers.def"
+
+// Declare all of the available TargetMCA initialization functions.
+#define LLVM_TARGETMCA(TargetName) void LLVMInitialize##TargetName##TargetMCA();
+#include "llvm/Config/TargetMCAs.def"
 }
 
 namespace llvm {
@@ -159,6 +163,14 @@ namespace llvm {
     return true;
 #endif
   }
+
+  /// InitializeAllTargetMCAs - The main program should call
+  /// this function to initialize the target CustomBehaviour and
+  /// InstrPostProcess classes.
+  inline void InitializeAllTargetMCAs() {
+#define LLVM_TARGETMCA(TargetName) LLVMInitialize##TargetName##TargetMCA();
+#include "llvm/Config/TargetMCAs.def"
+  }
 }
 
 #endif
diff --git a/llvm/include/llvm/Support/TypeSize.h b/llvm/include/llvm/Support/TypeSize.h
index 30bbbd7db8c9..7d1274735a37 100644
--- a/llvm/include/llvm/Support/TypeSize.h
+++ b/llvm/include/llvm/Support/TypeSize.h
@@ -229,7 +229,6 @@ public:
   bool isZero() const { return !Value; }
   bool isNonZero() const { return !isZero(); }
   explicit operator bool() const { return isNonZero(); }
-  ScalarTy getValue() const { return Value; }
   ScalarTy getValue(unsigned Dim) const {
     return Dim == UnivariateDim ? Value : 0;
   }
@@ -250,7 +249,7 @@ public:
 
 //===----------------------------------------------------------------------===//
 // LinearPolySize - base class for fixed- or scalable sizes.
-//  ^  ^ 
+//  ^  ^
 //  |  |
 //  |  +----- ElementCount - Leaf class to represent an element count
 //  |                        (vscale x unsigned)
@@ -294,7 +293,7 @@ public:
   static LeafTy getNull() { return get(0, false); }
 
   /// Returns the minimum value this size can represent.
-  ScalarTy getKnownMinValue() const { return this->getValue(); }
+  ScalarTy getKnownMinValue() const { return this->Value; }
   /// Returns whether the size is scaled by a runtime quantity (vscale).
   bool isScalable() const { return this->UnivariateDim == ScalableDim; }
   /// A return value of true indicates we know at compile time that the number
@@ -500,8 +499,7 @@ inline raw_ostream &operator<<(raw_ostream &OS,
   return OS;
 }
 
-template <typename T> struct DenseMapInfo;
-template <> struct DenseMapInfo<ElementCount> {
+template <> struct DenseMapInfo<ElementCount, void> {
   static inline ElementCount getEmptyKey() {
     return ElementCount::getScalable(~0U);
   }
diff --git a/llvm/include/llvm/Support/VersionTuple.h b/llvm/include/llvm/Support/VersionTuple.h
index a48ae0bf52bd..1a1072d228f1 100644
--- a/llvm/include/llvm/Support/VersionTuple.h
+++ b/llvm/include/llvm/Support/VersionTuple.h
@@ -17,6 +17,7 @@
 #include "llvm/ADT/DenseMapInfo.h"
 #include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/Optional.h"
+#include "llvm/Support/HashBuilder.h"
 #include <string>
 #include <tuple>
 
@@ -164,6 +165,12 @@ public:
     return llvm::hash_combine(VT.Major, VT.Minor, VT.Subminor, VT.Build);
   }
 
+  template <typename HasherT, llvm::support::endianness Endianness>
+  friend void addHash(HashBuilderImpl<HasherT, Endianness> &HBuilder,
+                      const VersionTuple &VT) {
+    HBuilder.add(VT.Major, VT.Minor, VT.Subminor, VT.Build);
+  }
+
   /// Retrieve a string representation of the version number.
   std::string getAsString() const;
 
diff --git a/llvm/include/llvm/Support/VirtualFileSystem.h b/llvm/include/llvm/Support/VirtualFileSystem.h
index 323e6719645d..10d2389ee079 100644
--- a/llvm/include/llvm/Support/VirtualFileSystem.h
+++ b/llvm/include/llvm/Support/VirtualFileSystem.h
@@ -121,6 +121,14 @@ public:
 
   /// Closes the file.
   virtual std::error_code close() = 0;
+
+  // Get the same file with a different path.
+  static ErrorOr<std::unique_ptr<File>>
+  getWithPath(ErrorOr<std::unique_ptr<File>> Result, const Twine &P);
+
+protected:
+  // Set the file's underlying path.
+  virtual void setPath(const Twine &Path) {}
 };
 
 /// A member of a directory, yielded by a directory_iterator.
@@ -596,6 +604,17 @@ class RedirectingFileSystemParser;
 /// contain multiple path components (e.g. /path/to/file). However, any
 /// directory in such a path that contains more than one child must be uniquely
 /// represented by a 'directory' entry.
+///
+/// When the 'use-external-name' field is set, calls to \a vfs::File::status()
+/// give the external (remapped) filesystem name instead of the name the file
+/// was accessed by. This is an intentional leak through the \a
+/// RedirectingFileSystem abstraction layer. It enables clients to discover
+/// (and use) the external file location when communicating with users or tools
+/// that don't use the same VFS overlay.
+///
+/// FIXME: 'use-external-name' causes behaviour that's inconsistent with how
+/// "real" filesystems behave. Maybe there should be a separate channel for
+/// this information.
 class RedirectingFileSystem : public vfs::FileSystem {
 public:
   enum EntryKind { EK_Directory, EK_DirectoryRemap, EK_File };
@@ -746,6 +765,12 @@ private:
   /// with the given error code on a path associated with the provided Entry.
   bool shouldFallBackToExternalFS(std::error_code EC, Entry *E = nullptr) const;
 
+  /// Get the File status, or error, from the underlying external file system.
+  /// This returns the status with the originally requested name, while looking
+  /// up the entry using the canonical path.
+  ErrorOr<Status> getExternalStatus(const Twine &CanonicalPath,
+                                    const Twine &OriginalPath) const;
+
   // In a RedirectingFileSystem, keys can be specified in Posix or Windows
   // style (or even a mixture of both), so this comparison helper allows
   // slashes (representing a root) to match backslashes (and vice versa).  Note
@@ -777,12 +802,7 @@ private:
   /// Whether to perform case-sensitive comparisons.
   ///
   /// Currently, case-insensitive matching only works correctly with ASCII.
-  bool CaseSensitive =
-#ifdef _WIN32
-      false;
-#else
-      true;
-#endif
+  bool CaseSensitive = is_style_posix(sys::path::Style::native);
 
   /// IsRelativeOverlay marks whether a ExternalContentsPrefixDir path must
   /// be prefixed in every 'external-contents' when reading from YAML files.
@@ -808,7 +828,8 @@ private:
                                        Entry *From) const;
 
   /// Get the status for a path with the provided \c LookupResult.
-  ErrorOr<Status> status(const Twine &Path, const LookupResult &Result);
+  ErrorOr<Status> status(const Twine &CanonicalPath, const Twine &OriginalPath,
+                         const LookupResult &Result);
 
 public:
   /// Looks up \p Path in \c Roots and returns a LookupResult giving the
diff --git a/llvm/include/llvm/Support/Windows/WindowsSupport.h b/llvm/include/llvm/Support/Windows/WindowsSupport.h
index a45eeaba4ad5..917822678e97 100644
--- a/llvm/include/llvm/Support/Windows/WindowsSupport.h
+++ b/llvm/include/llvm/Support/Windows/WindowsSupport.h
@@ -68,10 +68,10 @@ llvm::VersionTuple GetWindowsOSVersion();
 bool MakeErrMsg(std::string *ErrMsg, const std::string &prefix);
 
 // Include GetLastError() in a fatal error message.
-LLVM_ATTRIBUTE_NORETURN inline void ReportLastErrorFatal(const char *Msg) {
+[[noreturn]] inline void ReportLastErrorFatal(const char *Msg) {
   std::string ErrMsg;
   MakeErrMsg(&ErrMsg, Msg);
-  llvm::report_fatal_error(ErrMsg);
+  llvm::report_fatal_error(Twine(ErrMsg));
 }
 
 template <typename HandleTraits>
diff --git a/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h b/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h
index 757a3c0c8a71..aca717a9f6cb 100644
--- a/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h
+++ b/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h
@@ -31,6 +31,8 @@ namespace X86Disassembler {
 #define XOP9_MAP_SYM      x86DisassemblerXOP9Opcodes
 #define XOPA_MAP_SYM      x86DisassemblerXOPAOpcodes
 #define THREEDNOW_MAP_SYM x86Disassembler3DNowOpcodes
+#define MAP5_SYM          x86DisassemblerMap5Opcodes
+#define MAP6_SYM          x86DisassemblerMap6Opcodes
 
 #define INSTRUCTIONS_STR  "x86DisassemblerInstrSpecifiers"
 #define CONTEXTS_STR      "x86DisassemblerContexts"
@@ -42,6 +44,8 @@ namespace X86Disassembler {
 #define XOP9_MAP_STR      "x86DisassemblerXOP9Opcodes"
 #define XOPA_MAP_STR      "x86DisassemblerXOPAOpcodes"
 #define THREEDNOW_MAP_STR "x86Disassembler3DNowOpcodes"
+#define MAP5_STR          "x86DisassemblerMap5Opcodes"
+#define MAP6_STR          "x86DisassemblerMap6Opcodes"
 
 // Attributes of an instruction that must be known before the opcode can be
 // processed correctly.  Most of these indicate the presence of particular
@@ -292,7 +296,9 @@ enum OpcodeType {
   XOP8_MAP      = 4,
   XOP9_MAP      = 5,
   XOPA_MAP      = 6,
-  THREEDNOW_MAP = 7
+  THREEDNOW_MAP = 7,
+  MAP5          = 8,
+  MAP6          = 9
 };
 
 // The following structs are used for the hierarchical decode table.  After
diff --git a/llvm/include/llvm/Support/X86TargetParser.def b/llvm/include/llvm/Support/X86TargetParser.def
index ffcc2238e3ce..4443d822d3e8 100644
--- a/llvm/include/llvm/Support/X86TargetParser.def
+++ b/llvm/include/llvm/Support/X86TargetParser.def
@@ -91,54 +91,59 @@ X86_CPU_SUBTYPE(AMDFAM19H_ZNVER3,            "znver3")
 X86_CPU_SUBTYPE(INTEL_COREI7_ROCKETLAKE,     "rocketlake")
 #undef X86_CPU_SUBTYPE
 
-
-// This macro is used for cpu types present in compiler-rt/libgcc.
+// This macro is used for cpu types present in compiler-rt/libgcc. The third
+// parameter PRIORITY is as required by the attribute 'target' checking. Note
+// that not all are supported/prioritized by GCC, so synchronization with GCC's
+// implementation may require changing some existing values.
+//
+// We cannot just re-sort the list though because its order is dictated by the
+// order of bits in CodeGenFunction::GetX86CpuSupportsMask.
 #ifndef X86_FEATURE_COMPAT
-#define X86_FEATURE_COMPAT(ENUM, STR) X86_FEATURE(ENUM, STR)
+#define X86_FEATURE_COMPAT(ENUM, STR, PRIORITY) X86_FEATURE(ENUM, STR)
 #endif
 
 #ifndef X86_FEATURE
 #define X86_FEATURE(ENUM, STR)
 #endif
 
-X86_FEATURE_COMPAT(CMOV,            "cmov")
-X86_FEATURE_COMPAT(MMX,             "mmx")
-X86_FEATURE_COMPAT(POPCNT,          "popcnt")
-X86_FEATURE_COMPAT(SSE,             "sse")
-X86_FEATURE_COMPAT(SSE2,            "sse2")
-X86_FEATURE_COMPAT(SSE3,            "sse3")
-X86_FEATURE_COMPAT(SSSE3,           "ssse3")
-X86_FEATURE_COMPAT(SSE4_1,          "sse4.1")
-X86_FEATURE_COMPAT(SSE4_2,          "sse4.2")
-X86_FEATURE_COMPAT(AVX,             "avx")
-X86_FEATURE_COMPAT(AVX2,            "avx2")
-X86_FEATURE_COMPAT(SSE4_A,          "sse4a")
-X86_FEATURE_COMPAT(FMA4,            "fma4")
-X86_FEATURE_COMPAT(XOP,             "xop")
-X86_FEATURE_COMPAT(FMA,             "fma")
-X86_FEATURE_COMPAT(AVX512F,         "avx512f")
-X86_FEATURE_COMPAT(BMI,             "bmi")
-X86_FEATURE_COMPAT(BMI2,            "bmi2")
-X86_FEATURE_COMPAT(AES,             "aes")
-X86_FEATURE_COMPAT(PCLMUL,          "pclmul")
-X86_FEATURE_COMPAT(AVX512VL,        "avx512vl")
-X86_FEATURE_COMPAT(AVX512BW,        "avx512bw")
-X86_FEATURE_COMPAT(AVX512DQ,        "avx512dq")
-X86_FEATURE_COMPAT(AVX512CD,        "avx512cd")
-X86_FEATURE_COMPAT(AVX512ER,        "avx512er")
-X86_FEATURE_COMPAT(AVX512PF,        "avx512pf")
-X86_FEATURE_COMPAT(AVX512VBMI,      "avx512vbmi")
-X86_FEATURE_COMPAT(AVX512IFMA,      "avx512ifma")
-X86_FEATURE_COMPAT(AVX5124VNNIW,    "avx5124vnniw")
-X86_FEATURE_COMPAT(AVX5124FMAPS,    "avx5124fmaps")
-X86_FEATURE_COMPAT(AVX512VPOPCNTDQ, "avx512vpopcntdq")
-X86_FEATURE_COMPAT(AVX512VBMI2,     "avx512vbmi2")
-X86_FEATURE_COMPAT(GFNI,            "gfni")
-X86_FEATURE_COMPAT(VPCLMULQDQ,      "vpclmulqdq")
-X86_FEATURE_COMPAT(AVX512VNNI,      "avx512vnni")
-X86_FEATURE_COMPAT(AVX512BITALG,    "avx512bitalg")
-X86_FEATURE_COMPAT(AVX512BF16,      "avx512bf16")
-X86_FEATURE_COMPAT(AVX512VP2INTERSECT, "avx512vp2intersect")
+X86_FEATURE_COMPAT(CMOV,            "cmov",                  0)
+X86_FEATURE_COMPAT(MMX,             "mmx",                   1)
+X86_FEATURE_COMPAT(POPCNT,          "popcnt",                9)
+X86_FEATURE_COMPAT(SSE,             "sse",                   2)
+X86_FEATURE_COMPAT(SSE2,            "sse2",                  3)
+X86_FEATURE_COMPAT(SSE3,            "sse3",                  4)
+X86_FEATURE_COMPAT(SSSE3,           "ssse3",                 5)
+X86_FEATURE_COMPAT(SSE4_1,          "sse4.1",                7)
+X86_FEATURE_COMPAT(SSE4_2,          "sse4.2",                8)
+X86_FEATURE_COMPAT(AVX,             "avx",                   12)
+X86_FEATURE_COMPAT(AVX2,            "avx2",                  18)
+X86_FEATURE_COMPAT(SSE4_A,          "sse4a",                 6)
+X86_FEATURE_COMPAT(FMA4,            "fma4",                  14)
+X86_FEATURE_COMPAT(XOP,             "xop",                   15)
+X86_FEATURE_COMPAT(FMA,             "fma",                   16)
+X86_FEATURE_COMPAT(AVX512F,         "avx512f",               19)
+X86_FEATURE_COMPAT(BMI,             "bmi",                   13)
+X86_FEATURE_COMPAT(BMI2,            "bmi2",                  17)
+X86_FEATURE_COMPAT(AES,             "aes",                   10)
+X86_FEATURE_COMPAT(PCLMUL,          "pclmul",                11)
+X86_FEATURE_COMPAT(AVX512VL,        "avx512vl",              20)
+X86_FEATURE_COMPAT(AVX512BW,        "avx512bw",              21)
+X86_FEATURE_COMPAT(AVX512DQ,        "avx512dq",              22)
+X86_FEATURE_COMPAT(AVX512CD,        "avx512cd",              23)
+X86_FEATURE_COMPAT(AVX512ER,        "avx512er",              24)
+X86_FEATURE_COMPAT(AVX512PF,        "avx512pf",              25)
+X86_FEATURE_COMPAT(AVX512VBMI,      "avx512vbmi",            26)
+X86_FEATURE_COMPAT(AVX512IFMA,      "avx512ifma",            27)
+X86_FEATURE_COMPAT(AVX5124VNNIW,    "avx5124vnniw",          28)
+X86_FEATURE_COMPAT(AVX5124FMAPS,    "avx5124fmaps",          29)
+X86_FEATURE_COMPAT(AVX512VPOPCNTDQ, "avx512vpopcntdq",       30)
+X86_FEATURE_COMPAT(AVX512VBMI2,     "avx512vbmi2",           31)
+X86_FEATURE_COMPAT(GFNI,            "gfni",                  32)
+X86_FEATURE_COMPAT(VPCLMULQDQ,      "vpclmulqdq",            33)
+X86_FEATURE_COMPAT(AVX512VNNI,      "avx512vnni",            34)
+X86_FEATURE_COMPAT(AVX512BITALG,    "avx512bitalg",          35)
+X86_FEATURE_COMPAT(AVX512BF16,      "avx512bf16",            36)
+X86_FEATURE_COMPAT(AVX512VP2INTERSECT, "avx512vp2intersect", 37)
 // Features below here are not in libgcc/compiler-rt.
 X86_FEATURE       (3DNOW,           "3dnow")
 X86_FEATURE       (3DNOWA,          "3dnowa")
@@ -153,6 +158,7 @@ X86_FEATURE       (CLWB,            "clwb")
 X86_FEATURE       (CLZERO,          "clzero")
 X86_FEATURE       (CMPXCHG16B,      "cx16")
 X86_FEATURE       (CMPXCHG8B,       "cx8")
+X86_FEATURE       (CRC32,           "crc32")
 X86_FEATURE       (ENQCMD,          "enqcmd")
 X86_FEATURE       (F16C,            "f16c")
 X86_FEATURE       (FSGSBASE,        "fsgsbase")
@@ -193,6 +199,7 @@ X86_FEATURE       (XSAVEC,          "xsavec")
 X86_FEATURE       (XSAVEOPT,        "xsaveopt")
 X86_FEATURE       (XSAVES,          "xsaves")
 X86_FEATURE       (HRESET,          "hreset")
+X86_FEATURE       (AVX512FP16,      "avx512fp16")
 X86_FEATURE       (AVXVNNI,         "avxvnni")
 // These features aren't really CPU features, but the frontend can set them.
 X86_FEATURE       (RETPOLINE_EXTERNAL_THUNK,    "retpoline-external-thunk")
@@ -202,3 +209,49 @@ X86_FEATURE       (LVI_CFI,                     "lvi-cfi")
 X86_FEATURE       (LVI_LOAD_HARDENING,          "lvi-load-hardening")
 #undef X86_FEATURE_COMPAT
 #undef X86_FEATURE
+
+#ifndef CPU_SPECIFIC
+#define CPU_SPECIFIC(NAME, MANGLING, FEATURES)
+#endif
+
+#ifndef CPU_SPECIFIC_ALIAS
+#define CPU_SPECIFIC_ALIAS(NEW_NAME, NAME)
+#endif
+
+CPU_SPECIFIC("generic", 'A', "")
+CPU_SPECIFIC("pentium", 'B', "")
+CPU_SPECIFIC("pentium_pro", 'C', "+cmov")
+CPU_SPECIFIC("pentium_mmx", 'D', "+mmx")
+CPU_SPECIFIC("pentium_ii", 'E', "+cmov,+mmx")
+CPU_SPECIFIC("pentium_iii", 'H', "+cmov,+mmx,+sse")
+CPU_SPECIFIC_ALIAS("pentium_iii_no_xmm_regs", "pentium_iii")
+CPU_SPECIFIC("pentium_4", 'J', "+cmov,+mmx,+sse,+sse2")
+CPU_SPECIFIC("pentium_m", 'K', "+cmov,+mmx,+sse,+sse2")
+CPU_SPECIFIC("pentium_4_sse3", 'L', "+cmov,+mmx,+sse,+sse2,+sse3")
+CPU_SPECIFIC("core_2_duo_ssse3", 'M', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3")
+CPU_SPECIFIC("core_2_duo_sse4_1", 'N', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1")
+CPU_SPECIFIC("atom", 'O', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+movbe")
+CPU_SPECIFIC("atom_sse4_2", 'c', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt")
+CPU_SPECIFIC("core_i7_sse4_2", 'P', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt")
+CPU_SPECIFIC("core_aes_pclmulqdq", 'Q', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt")
+CPU_SPECIFIC("atom_sse4_2_movbe", 'd', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt")
+CPU_SPECIFIC("goldmont", 'i', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt")
+CPU_SPECIFIC("sandybridge", 'R', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt,+avx")
+CPU_SPECIFIC_ALIAS("core_2nd_gen_avx", "sandybridge")
+CPU_SPECIFIC("ivybridge", 'S', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt,+f16c,+avx")
+CPU_SPECIFIC_ALIAS("core_3rd_gen_avx", "ivybridge")
+CPU_SPECIFIC("haswell", 'V', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2")
+CPU_SPECIFIC_ALIAS("core_4th_gen_avx", "haswell")
+CPU_SPECIFIC("core_4th_gen_avx_tsx", 'W', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2")
+CPU_SPECIFIC("broadwell", 'X', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+adx")
+CPU_SPECIFIC_ALIAS("core_5th_gen_avx", "broadwell")
+CPU_SPECIFIC("core_5th_gen_avx_tsx", 'Y', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+adx")
+CPU_SPECIFIC("knl", 'Z', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+avx512f,+adx,+avx512er,+avx512pf,+avx512cd")
+CPU_SPECIFIC_ALIAS("mic_avx512", "knl")
+CPU_SPECIFIC("skylake", 'b', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+adx,+mpx")
+CPU_SPECIFIC( "skylake_avx512", 'a', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+avx512dq,+avx512f,+adx,+avx512cd,+avx512bw,+avx512vl,+clwb")
+CPU_SPECIFIC("cannonlake", 'e', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+avx512dq,+avx512f,+adx,+avx512ifma,+avx512cd,+avx512bw,+avx512vl,+avx512vbmi")
+CPU_SPECIFIC("knm", 'j', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+avx512f,+adx,+avx512er,+avx512pf,+avx512cd,+avx5124fmaps,+avx5124vnniw,+avx512vpopcntdq")
+
+#undef CPU_SPECIFIC_ALIAS
+#undef CPU_SPECIFIC
diff --git a/llvm/include/llvm/Support/X86TargetParser.h b/llvm/include/llvm/Support/X86TargetParser.h
index ed02066933a7..bfa3e23dbd9d 100644
--- a/llvm/include/llvm/Support/X86TargetParser.h
+++ b/llvm/include/llvm/Support/X86TargetParser.h
@@ -13,6 +13,7 @@
 #ifndef LLVM_SUPPORT_X86TARGETPARSER_H
 #define LLVM_SUPPORT_X86TARGETPARSER_H
 
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
 
@@ -154,6 +155,9 @@ void getFeaturesForCPU(StringRef CPU, SmallVectorImpl<StringRef> &Features);
 void updateImpliedFeatures(StringRef Feature, bool Enabled,
                            StringMap<bool> &Features);
 
+uint64_t getCpuSupportsMask(ArrayRef<StringRef> FeatureStrs);
+unsigned getFeaturePriority(ProcessorFeatures Feat);
+
 } // namespace X86
 } // namespace llvm
 
diff --git a/llvm/include/llvm/Support/YAMLTraits.h b/llvm/include/llvm/Support/YAMLTraits.h
index 9ac9eb300983..bea232e6e000 100644
--- a/llvm/include/llvm/Support/YAMLTraits.h
+++ b/llvm/include/llvm/Support/YAMLTraits.h
@@ -1641,7 +1641,7 @@ void IO::processKeyWithDefault(const char *Key, Optional<T> &Val,
     // usually None.
     bool IsNone = false;
     if (!outputting())
-      if (auto *Node = dyn_cast<ScalarNode>(((Input *)this)->getCurrentNode()))
+      if (const auto *Node = dyn_cast<ScalarNode>(((Input *)this)->getCurrentNode()))
         // We use rtrim to ignore possible white spaces that might exist when a
         // comment is present on the same line.
         IsNone = Node->getRawValue().rtrim(' ') == "<none>";
diff --git a/llvm/include/llvm/Support/raw_ostream.h b/llvm/include/llvm/Support/raw_ostream.h
index c669c2babad9..98c26ef0b1e5 100644
--- a/llvm/include/llvm/Support/raw_ostream.h
+++ b/llvm/include/llvm/Support/raw_ostream.h
@@ -330,6 +330,8 @@ public:
   // changeColor() has no effect until enable_colors(true) is called.
   virtual void enable_colors(bool enable) { ColorEnabled = enable; }
 
+  bool colors_enabled() const { return ColorEnabled; }
+
   /// Tie this stream to the specified stream. Replaces any existing tied-to
   /// stream. Specifying a nullptr unties the stream.
   void tie(raw_ostream *TieTo) { TiedStream = TieTo; }
@@ -719,7 +721,11 @@ class buffer_unique_ostream : public raw_svector_ostream {
 
 public:
   buffer_unique_ostream(std::unique_ptr<raw_ostream> OS)
-      : raw_svector_ostream(Buffer), OS(std::move(OS)) {}
+      : raw_svector_ostream(Buffer), OS(std::move(OS)) {
+    // Turn off buffering on OS, which we now own, to avoid allocating a buffer
+    // when the destructor writes only to be immediately flushed again.
+    this->OS->SetUnbuffered();
+  }
   ~buffer_unique_ostream() override { *OS << str(); }
 };
 
diff --git a/llvm/include/llvm/TableGen/DirectiveEmitter.h b/llvm/include/llvm/TableGen/DirectiveEmitter.h
index 5c4a736eb107..d73b9ae49235 100644
--- a/llvm/include/llvm/TableGen/DirectiveEmitter.h
+++ b/llvm/include/llvm/TableGen/DirectiveEmitter.h
@@ -152,7 +152,7 @@ public:
       }
       return C;
     });
-    N.erase(std::remove(N.begin(), N.end(), '_'), N.end());
+    llvm::erase_value(N, '_');
     return N;
   }
 
diff --git a/llvm/include/llvm/TableGen/Error.h b/llvm/include/llvm/TableGen/Error.h
index a0e23aca211e..da0132b10f4f 100644
--- a/llvm/include/llvm/TableGen/Error.h
+++ b/llvm/include/llvm/TableGen/Error.h
@@ -22,13 +22,10 @@ namespace llvm {
 void PrintNote(const Twine &Msg);
 void PrintNote(ArrayRef<SMLoc> NoteLoc, const Twine &Msg);
 
-LLVM_ATTRIBUTE_NORETURN void PrintFatalNote(const Twine &Msg);
-LLVM_ATTRIBUTE_NORETURN void PrintFatalNote(ArrayRef<SMLoc> ErrorLoc,
-                                            const Twine &Msg);
-LLVM_ATTRIBUTE_NORETURN void PrintFatalNote(const Record *Rec,
-                                            const Twine &Msg);
-LLVM_ATTRIBUTE_NORETURN void PrintFatalNote(const RecordVal *RecVal,
-                                            const Twine &Msg);
+[[noreturn]] void PrintFatalNote(const Twine &Msg);
+[[noreturn]] void PrintFatalNote(ArrayRef<SMLoc> ErrorLoc, const Twine &Msg);
+[[noreturn]] void PrintFatalNote(const Record *Rec, const Twine &Msg);
+[[noreturn]] void PrintFatalNote(const RecordVal *RecVal, const Twine &Msg);
 
 void PrintWarning(const Twine &Msg);
 void PrintWarning(ArrayRef<SMLoc> WarningLoc, const Twine &Msg);
@@ -40,13 +37,10 @@ void PrintError(const char *Loc, const Twine &Msg);
 void PrintError(const Record *Rec, const Twine &Msg);
 void PrintError(const RecordVal *RecVal, const Twine &Msg);
 
-LLVM_ATTRIBUTE_NORETURN void PrintFatalError(const Twine &Msg);
-LLVM_ATTRIBUTE_NORETURN void PrintFatalError(ArrayRef<SMLoc> ErrorLoc,
-                                             const Twine &Msg);
-LLVM_ATTRIBUTE_NORETURN void PrintFatalError(const Record *Rec,
-                                             const Twine &Msg);
-LLVM_ATTRIBUTE_NORETURN void PrintFatalError(const RecordVal *RecVal,
-                                             const Twine &Msg);
+[[noreturn]] void PrintFatalError(const Twine &Msg);
+[[noreturn]] void PrintFatalError(ArrayRef<SMLoc> ErrorLoc, const Twine &Msg);
+[[noreturn]] void PrintFatalError(const Record *Rec, const Twine &Msg);
+[[noreturn]] void PrintFatalError(const RecordVal *RecVal, const Twine &Msg);
 
 void CheckAssert(SMLoc Loc, Init *Condition, Init *Message);
 
diff --git a/llvm/include/llvm/TableGen/Record.h b/llvm/include/llvm/TableGen/Record.h
index 713d9375448c..5869a5cf0423 100644
--- a/llvm/include/llvm/TableGen/Record.h
+++ b/llvm/include/llvm/TableGen/Record.h
@@ -39,6 +39,9 @@
 #include <vector>
 
 namespace llvm {
+namespace detail {
+struct RecordContext;
+} // namespace detail
 
 class ListRecTy;
 struct MultiClass;
@@ -100,7 +103,7 @@ inline raw_ostream &operator<<(raw_ostream &OS, const RecTy &Ty) {
 
 /// 'bit' - Represent a single bit
 class BitRecTy : public RecTy {
-  static BitRecTy Shared;
+  friend detail::RecordContext;
 
   BitRecTy() : RecTy(BitRecTyKind) {}
 
@@ -109,7 +112,7 @@ public:
     return RT->getRecTyKind() == BitRecTyKind;
   }
 
-  static BitRecTy *get() { return &Shared; }
+  static BitRecTy *get();
 
   std::string getAsString() const override { return "bit"; }
 
@@ -140,7 +143,7 @@ public:
 
 /// 'int' - Represent an integer value of no particular size
 class IntRecTy : public RecTy {
-  static IntRecTy Shared;
+  friend detail::RecordContext;
 
   IntRecTy() : RecTy(IntRecTyKind) {}
 
@@ -149,7 +152,7 @@ public:
     return RT->getRecTyKind() == IntRecTyKind;
   }
 
-  static IntRecTy *get() { return &Shared; }
+  static IntRecTy *get();
 
   std::string getAsString() const override { return "int"; }
 
@@ -158,7 +161,7 @@ public:
 
 /// 'string' - Represent an string value
 class StringRecTy : public RecTy {
-  static StringRecTy Shared;
+  friend detail::RecordContext;
 
   StringRecTy() : RecTy(StringRecTyKind) {}
 
@@ -167,7 +170,7 @@ public:
     return RT->getRecTyKind() == StringRecTyKind;
   }
 
-  static StringRecTy *get() { return &Shared; }
+  static StringRecTy *get();
 
   std::string getAsString() const override;
 
@@ -200,7 +203,7 @@ public:
 
 /// 'dag' - Represent a dag fragment
 class DagRecTy : public RecTy {
-  static DagRecTy Shared;
+  friend detail::RecordContext;
 
   DagRecTy() : RecTy(DagRecTyKind) {}
 
@@ -209,7 +212,7 @@ public:
     return RT->getRecTyKind() == DagRecTyKind;
   }
 
-  static DagRecTy *get() { return &Shared; }
+  static DagRecTy *get();
 
   std::string getAsString() const override;
 };
@@ -221,6 +224,7 @@ public:
 class RecordRecTy final : public RecTy, public FoldingSetNode,
                           public TrailingObjects<RecordRecTy, Record *> {
   friend class Record;
+  friend detail::RecordContext;
 
   unsigned NumClasses;
 
@@ -437,6 +441,8 @@ public:
 
 /// '?' - Represents an uninitialized value.
 class UnsetInit : public Init {
+  friend detail::RecordContext;
+
   UnsetInit() : Init(IK_UnsetInit) {}
 
 public:
@@ -468,9 +474,11 @@ public:
 
 /// 'true'/'false' - Represent a concrete initializer for a bit.
 class BitInit final : public TypedInit {
+  friend detail::RecordContext;
+
   bool Value;
 
-  explicit BitInit(bool V) : TypedInit(IK_BitInit, BitRecTy::get()), Value(V) {}
+  explicit BitInit(bool V, RecTy *T) : TypedInit(IK_BitInit, T), Value(V) {}
 
 public:
   BitInit(const BitInit &) = delete;
@@ -637,7 +645,7 @@ public:
   }
 
   StringRef getValue() const { return Value; }
-  StringFormat getFormat() const { return Format; }  
+  StringFormat getFormat() const { return Format; }
   bool hasCodeFormat() const { return Format == SF_Code; }
 
   Init *convertInitializerTo(RecTy *Ty) const override;
@@ -1414,6 +1422,7 @@ private:
   SMLoc Loc; // Source location of definition of name.
   PointerIntPair<RecTy *, 2, FieldKind> TyAndKind;
   Init *Value;
+  bool IsUsed = false;
 
 public:
   RecordVal(Init *N, RecTy *T, FieldKind K);
@@ -1458,6 +1467,11 @@ public:
   /// Set the value and source location of the field.
   bool setValue(Init *V, SMLoc NewLoc);
 
+  /// Whether this value is used. Useful for reporting warnings, for example
+  /// when a template argument is unused.
+  void setUsed(bool Used) { IsUsed = Used; }
+  bool isUsed() const { return IsUsed; }
+
   void dump() const;
 
   /// Print the value to an output stream, possibly with a semicolon.
@@ -1483,8 +1497,6 @@ public:
   };
 
 private:
-  static unsigned LastID;
-
   Init *Name;
   // Location where record was instantiated, followed by the location of
   // multiclass prototypes used.
@@ -1515,8 +1527,8 @@ public:
   // Constructs a record.
   explicit Record(Init *N, ArrayRef<SMLoc> locs, RecordKeeper &records,
                   bool Anonymous = false, bool Class = false)
-    : Name(N), Locs(locs.begin(), locs.end()), TrackedRecords(records),
-      ID(LastID++), IsAnonymous(Anonymous), IsClass(Class) {
+      : Name(N), Locs(locs.begin(), locs.end()), TrackedRecords(records),
+        ID(getNewUID()), IsAnonymous(Anonymous), IsClass(Class) {
     checkName();
   }
 
@@ -1528,12 +1540,12 @@ public:
   // ID number. Don't copy CorrespondingDefInit either, since it's owned by the
   // original record. All other fields can be copied normally.
   Record(const Record &O)
-    : Name(O.Name), Locs(O.Locs), TemplateArgs(O.TemplateArgs),
-      Values(O.Values), Assertions(O.Assertions), SuperClasses(O.SuperClasses),
-      TrackedRecords(O.TrackedRecords), ID(LastID++),
-      IsAnonymous(O.IsAnonymous), IsClass(O.IsClass) { }
+      : Name(O.Name), Locs(O.Locs), TemplateArgs(O.TemplateArgs),
+        Values(O.Values), Assertions(O.Assertions),
+        SuperClasses(O.SuperClasses), TrackedRecords(O.TrackedRecords),
+        ID(getNewUID()), IsAnonymous(O.IsAnonymous), IsClass(O.IsClass) {}
 
-  static unsigned getNewUID() { return LastID++; }
+  static unsigned getNewUID();
 
   unsigned getID() const { return ID; }
 
@@ -1632,6 +1644,7 @@ public:
   }
 
   void checkRecordAssertions();
+  void checkUnusedTemplateArgs();
 
   bool isSubClassOf(const Record *R) const {
     for (const auto &SCPair : SuperClasses)
diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td
index e3e1d5fc3c65..72c974834a2f 100644
--- a/llvm/include/llvm/Target/GenericOpcodes.td
+++ b/llvm/include/llvm/Target/GenericOpcodes.td
@@ -225,6 +225,18 @@ def G_FREEZE : GenericInstruction {
   let hasSideEffects = false;
 }
 
+def G_LROUND: GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type1:$src);
+  let hasSideEffects = false;
+}
+
+def G_LLROUND: GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type1:$src);
+  let hasSideEffects = false;
+}
+
 //------------------------------------------------------------------------------
 // Binary ops.
 //------------------------------------------------------------------------------
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index f35156d59849..e2d3dbdda88a 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -130,7 +130,13 @@ def extending_loads : GICombineRule<
   (match (wip_match_opcode G_LOAD, G_SEXTLOAD, G_ZEXTLOAD):$root,
          [{ return Helper.matchCombineExtendingLoads(*${root}, ${matchinfo}); }]),
   (apply [{ Helper.applyCombineExtendingLoads(*${root}, ${matchinfo}); }])>;
-def combines_for_extload: GICombineGroup<[extending_loads]>;
+
+def load_and_mask : GICombineRule<
+  (defs root:$root, build_fn_matchinfo:$matchinfo),
+  (match (wip_match_opcode G_AND):$root,
+        [{ return Helper.matchCombineLoadWithAndMask(*${root}, ${matchinfo}); }]),
+  (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
+def combines_for_extload: GICombineGroup<[extending_loads, load_and_mask]>;
 
 def sext_trunc_sextload : GICombineRule<
   (defs root:$d),
@@ -197,6 +203,12 @@ def reduce_shl_of_extend : GICombineRule<
          [{ return Helper.matchCombineShlOfExtend(*${mi}, ${matchinfo}); }]),
   (apply [{ Helper.applyCombineShlOfExtend(*${mi}, ${matchinfo}); }])>;
 
+def narrow_binop_feeding_and : GICombineRule<
+  (defs root:$root, build_fn_matchinfo:$matchinfo),
+  (match (wip_match_opcode G_AND):$root,
+         [{ return Helper.matchNarrowBinopFeedingAnd(*${root}, ${matchinfo}); }]),
+  (apply [{ Helper.applyBuildFnNoErase(*${root}, ${matchinfo}); }])>;
+
 // [us]itofp(undef) = 0, because the result value is bounded.
 def undef_to_fp_zero : GICombineRule<
   (defs root:$root),
@@ -275,7 +287,7 @@ def select_constant_cmp: GICombineRule<
 def right_identity_zero: GICombineRule<
   (defs root:$root),
   (match (wip_match_opcode G_SUB, G_ADD, G_OR, G_XOR, G_SHL, G_ASHR, G_LSHR,
-                           G_PTR_ADD):$root,
+                           G_PTR_ADD, G_ROTL, G_ROTR):$root,
     [{ return Helper.matchConstantOp(${root}->getOperand(2), 0); }]),
   (apply [{ return Helper.replaceSingleDefInstWithOperand(*${root}, 1); }])
 >;
@@ -507,6 +519,13 @@ def fabs_fabs_fold: GICombineRule<
   (apply [{ return Helper.replaceSingleDefInstWithReg(*${root}, ${matchinfo}); }])
 >;
 
+// Fold (fabs (fneg x)) -> (fabs x).
+def fabs_fneg_fold: GICombineRule <
+  (defs root:$root, build_fn_matchinfo:$matchinfo),
+  (match (wip_match_opcode G_FABS):$root,
+         [{ return Helper.matchCombineFAbsOfFNeg(*${root}, ${matchinfo}); }]),
+  (apply [{ Helper.applyBuildFnNoErase(*${root}, ${matchinfo}); }])>;
+
 // Fold (unmerge cst) -> cst1, cst2, ...
 def unmerge_cst_matchinfo : GIDefMatchData<"SmallVector<APInt, 8>">;
 def unmerge_cst : GICombineRule<
@@ -588,6 +607,14 @@ def load_or_combine : GICombineRule<
     [{ return Helper.matchLoadOrCombine(*${root}, ${info}); }]),
   (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
 
+
+def truncstore_merge_matcdata : GIDefMatchData<"MergeTruncStoresInfo">;
+def truncstore_merge : GICombineRule<
+  (defs root:$root, truncstore_merge_matcdata:$info),
+  (match (wip_match_opcode G_STORE):$root,
+   [{ return Helper.matchTruncStoreMerge(*${root}, ${info}); }]),
+  (apply [{ Helper.applyTruncStoreMerge(*${root}, ${info}); }])>;
+
 def extend_through_phis_matchdata: GIDefMatchData<"MachineInstr*">;
 def extend_through_phis : GICombineRule<
   (defs root:$root, extend_through_phis_matchdata:$matchinfo),
@@ -638,6 +665,18 @@ def icmp_to_true_false_known_bits : GICombineRule<
          [{ return Helper.matchICmpToTrueFalseKnownBits(*${d}, ${matchinfo}); }]),
   (apply [{ Helper.replaceInstWithConstant(*${d}, ${matchinfo}); }])>;
 
+def icmp_to_lhs_known_bits : GICombineRule<
+  (defs root:$root, build_fn_matchinfo:$info),
+  (match (wip_match_opcode G_ICMP):$root,
+         [{ return Helper.matchICmpToLHSKnownBits(*${root}, ${info}); }]),
+  (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
+
+def and_or_disjoint_mask : GICombineRule<
+  (defs root:$root, build_fn_matchinfo:$info),
+  (match (wip_match_opcode G_AND):$root,
+         [{ return Helper.matchAndOrDisjointMask(*${root}, ${info}); }]),
+  (apply [{ Helper.applyBuildFnNoErase(*${root}, ${info}); }])>;
+
 def bitfield_extract_from_and : GICombineRule<
   (defs root:$root, build_fn_matchinfo:$info),
   (match (wip_match_opcode G_AND):$root,
@@ -652,8 +691,31 @@ def bitfield_extract_from_sext_inreg : GICombineRule<
     [{ return Helper.matchBitfieldExtractFromSExtInReg(*${root}, ${info}); }]),
   (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
 
+def bitfield_extract_from_shr : GICombineRule<
+  (defs root:$root, build_fn_matchinfo:$info),
+  (match (wip_match_opcode G_ASHR, G_LSHR):$root,
+    [{ return Helper.matchBitfieldExtractFromShr(*${root}, ${info}); }]),
+  (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
+
+def bitfield_extract_from_shr_and : GICombineRule<
+  (defs root:$root, build_fn_matchinfo:$info),
+  (match (wip_match_opcode G_ASHR, G_LSHR):$root,
+    [{ return Helper.matchBitfieldExtractFromShrAnd(*${root}, ${info}); }]),
+  (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
+
 def form_bitfield_extract : GICombineGroup<[bitfield_extract_from_sext_inreg,
-                                            bitfield_extract_from_and]>;
+                                            bitfield_extract_from_and,
+                                            bitfield_extract_from_shr,
+                                            bitfield_extract_from_shr_and]>;
+
+def udiv_by_const : GICombineRule<
+  (defs root:$root),
+  (match (wip_match_opcode G_UDIV):$root,
+   [{ return Helper.matchUDivByConst(*${root}); }]),
+  (apply [{ Helper.applyUDivByConst(*${root}); }])>;
+
+def intdiv_combines : GICombineGroup<[udiv_by_const]>;
+
 def reassoc_ptradd : GICombineRule<
   (defs root:$root, build_fn_matchinfo:$matchinfo),
   (match (wip_match_opcode G_PTR_ADD):$root,
@@ -669,6 +731,26 @@ def constant_fold : GICombineRule<
    [{ return Helper.matchConstantFold(*${d}, ${matchinfo}); }]),
   (apply [{ Helper.replaceInstWithConstant(*${d}, ${matchinfo}); }])>;
 
+def mulo_by_2: GICombineRule<
+  (defs root:$root, build_fn_matchinfo:$matchinfo),
+  (match (wip_match_opcode G_UMULO, G_SMULO):$root,
+         [{ return Helper.matchMulOBy2(*${root}, ${matchinfo}); }]),
+  (apply [{ Helper.applyBuildFnNoErase(*${root}, ${matchinfo}); }])>;
+
+def mulh_to_lshr : GICombineRule<
+  (defs root:$root),
+  (match (wip_match_opcode G_UMULH):$root,
+         [{ return Helper.matchUMulHToLShr(*${root}); }]),
+  (apply [{ Helper.applyUMulHToLShr(*${root}); }])>;
+
+def mulh_combines : GICombineGroup<[mulh_to_lshr]>;
+
+def redundant_neg_operands: GICombineRule<
+  (defs root:$root, build_fn_matchinfo:$matchinfo),
+  (match (wip_match_opcode G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMAD, G_FMA):$root,
+    [{ return Helper.matchRedundantNegOperands(*${root}, ${matchinfo}); }]),
+  (apply [{ Helper.applyBuildFnNoErase(*${root}, ${matchinfo}); }])>;
+
 // FIXME: These should use the custom predicate feature once it lands.
 def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
                                      undef_to_negative_one,
@@ -685,13 +767,14 @@ def identity_combines : GICombineGroup<[select_same_val, right_identity_zero,
                                         fneg_fneg_fold, right_identity_one]>;
 
 def const_combines : GICombineGroup<[constant_fp_op, const_ptradd_to_i2p,
-                                     overlapping_and]>;
+                                     overlapping_and, mulo_by_2]>;
 
 def known_bits_simplifications : GICombineGroup<[
   redundant_and, redundant_sext_inreg, redundant_or, urem_pow2_to_mask,
-  zext_trunc_fold, icmp_to_true_false_known_bits]>;
+  zext_trunc_fold, icmp_to_true_false_known_bits, icmp_to_lhs_known_bits]>;
 
-def width_reduction_combines : GICombineGroup<[reduce_shl_of_extend]>;
+def width_reduction_combines : GICombineGroup<[reduce_shl_of_extend,
+                                               narrow_binop_feeding_and]>;
 
 def phi_combines : GICombineGroup<[extend_through_phis]>;
 
@@ -713,8 +796,10 @@ def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
     unmerge_zext_to_zext, merge_unmerge, trunc_ext_fold, trunc_shl,
     const_combines, xor_of_and_with_same_reg, ptr_add_with_zero,
     shift_immed_chain, shift_of_shifted_logic_chain, load_or_combine,
-    div_rem_to_divrem, funnel_shift_combines, form_bitfield_extract,
-    constant_fold]>;
+    truncstore_merge, div_rem_to_divrem, funnel_shift_combines,
+    form_bitfield_extract, constant_fold, fabs_fneg_fold,
+    intdiv_combines, mulh_combines, redundant_neg_operands,
+    and_or_disjoint_mask ]>;
 
 // A combine group used to for prelegalizer combiners at -O0. The combines in
 // this group have been selected based on experiments to balance code size and
diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
index 8a5052401e9b..12eee24b578f 100644
--- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
+++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
@@ -144,6 +144,8 @@ def : GINodeEquiv<G_FMAXNUM_IEEE, fmaxnum_ieee>;
 def : GINodeEquiv<G_READCYCLECOUNTER, readcyclecounter>;
 def : GINodeEquiv<G_ROTR, rotr>;
 def : GINodeEquiv<G_ROTL, rotl>;
+def : GINodeEquiv<G_LROUND, lround>;
+def : GINodeEquiv<G_LLROUND, llround>;
 
 def : GINodeEquiv<G_STRICT_FADD, strict_fadd>;
 def : GINodeEquiv<G_STRICT_FSUB, strict_fsub>;
diff --git a/llvm/include/llvm/Target/Target.td b/llvm/include/llvm/Target/Target.td
index e9720d765167..7ae690b83770 100644
--- a/llvm/include/llvm/Target/Target.td
+++ b/llvm/include/llvm/Target/Target.td
@@ -306,6 +306,9 @@ class RegisterClass<string namespace, list<ValueType> regTypes, int alignment,
   // the assembly matcher will provide a function to map from diagnostic types
   // to message strings.
   string DiagnosticString = "";
+
+  // Target-specific flags. This becomes the TSFlags field in TargetRegisterClass.
+  bits<8> TSFlags = 0;
 }
 
 // The memberList in a RegisterClass is a dag of set operations. TableGen
@@ -650,6 +653,25 @@ class Instruction : InstructionEncoding {
   /// instruction selection predicates. FastISel cannot handle such cases, but
   /// SelectionDAG can.
   bit FastISelShouldIgnore = false;
+
+  /// HasPositionOrder: Indicate tablegen to sort the instructions by record
+  /// ID, so that instruction that is defined earlier can be sorted earlier
+  /// in the assembly matching table.
+  bit HasPositionOrder = false;
+}
+
+/// Defines a Pat match between compressed and uncompressed instruction.
+/// The relationship and helper function generation are handled by
+/// CompressInstEmitter backend.
+class CompressPat<dag input, dag output, list<Predicate> predicates = []> {
+  /// Uncompressed instruction description.
+  dag Input = input;
+  /// Compressed instruction description.
+  dag Output = output;
+  /// Predicates that must be true for this to match.
+  list<Predicate> Predicates = predicates;
+  /// Duplicate match when tied operand is just different.
+  bit isCompressOnly = false;
 }
 
 /// Defines an additional encoding that disassembles to the given instruction
diff --git a/llvm/include/llvm/Target/TargetLoweringObjectFile.h b/llvm/include/llvm/Target/TargetLoweringObjectFile.h
index 93bfdd20e082..752032d3d04d 100644
--- a/llvm/include/llvm/Target/TargetLoweringObjectFile.h
+++ b/llvm/include/llvm/Target/TargetLoweringObjectFile.h
@@ -15,6 +15,7 @@
 #define LLVM_TARGET_TARGETLOWERINGOBJECTFILE_H
 
 #include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCRegister.h"
 #include <cstdint>
 
 namespace llvm {
@@ -219,6 +220,14 @@ public:
     return SupportDebugThreadLocalLocation;
   }
 
+  /// Returns the register used as static base in RWPI variants.
+  virtual const MCRegister getStaticBase() const { return MCRegister::NoRegister; }
+
+  /// Get the target specific RWPI relocation.
+  virtual const MCExpr *getIndirectSymViaRWPI(const MCSymbol *Sym) const {
+    return nullptr;
+  }
+
   /// Get the target specific PC relative GOT entry relocation
   virtual const MCExpr *getIndirectSymViaGOTPCRel(const GlobalValue *GV,
                                                   const MCSymbol *Sym,
diff --git a/llvm/include/llvm/Target/TargetMachine.h b/llvm/include/llvm/Target/TargetMachine.h
index dd17af4a642a..acfb265a9ff9 100644
--- a/llvm/include/llvm/Target/TargetMachine.h
+++ b/llvm/include/llvm/Target/TargetMachine.h
@@ -13,6 +13,7 @@
 #ifndef LLVM_TARGET_TARGETMACHINE_H
 #define LLVM_TARGET_TARGETMACHINE_H
 
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/IR/DataLayout.h"
@@ -20,9 +21,11 @@
 #include "llvm/Pass.h"
 #include "llvm/Support/CodeGen.h"
 #include "llvm/Support/Error.h"
+#include "llvm/Support/PGOOptions.h"
 #include "llvm/Target/CGPassBuilderOption.h"
 #include "llvm/Target/TargetOptions.h"
 #include <string>
+#include <utility>
 
 namespace llvm {
 
@@ -110,6 +113,9 @@ protected: // Can only create subclasses.
   unsigned RequireStructuredCFG : 1;
   unsigned O0WantsFastISel : 1;
 
+  // PGO related tunables.
+  Optional<PGOOptions> PGOOption = None;
+
 public:
   const TargetOptions DefaultOptions;
   mutable TargetOptions Options;
@@ -303,6 +309,9 @@ public:
     return false;
   }
 
+  void setPGOOption(Optional<PGOOptions> PGOOpt) { PGOOption = PGOOpt; }
+  const Optional<PGOOptions> &getPGOOption() const { return PGOOption; }
+
   /// If the specified generic pointer could be assumed as a pointer to a
   /// specific address space, return that address space.
   ///
@@ -311,6 +320,18 @@ public:
   /// properties.
   virtual unsigned getAssumedAddrSpace(const Value *V) const { return -1; }
 
+  /// If the specified predicate checks whether a generic pointer falls within
+  /// a specified address space, return that generic pointer and the address
+  /// space being queried.
+  ///
+  /// Such predicates could be specified in @llvm.assume intrinsics for the
+  /// optimizer to assume that the given generic pointer always falls within
+  /// the address space based on that predicate.
+  virtual std::pair<const Value *, unsigned>
+  getPredicatedAddrSpace(const Value *V) const {
+    return std::make_pair(nullptr, -1);
+  }
+
   /// Get a \c TargetIRAnalysis appropriate for the target.
   ///
   /// This is used to construct the new pass manager's target IR analysis pass,
@@ -464,6 +485,10 @@ public:
   virtual bool useIPRA() const {
     return false;
   }
+
+  /// The default variant to use in unqualified `asm` instructions.
+  /// If this returns 0, `asm "$(foo$|bar$)"` will evaluate to `asm "foo"`.
+  virtual int unqualifiedInlineAsmVariant() const { return 0; }
 };
 
 /// Helper method for getting the code model, returning Default if
diff --git a/llvm/include/llvm/Target/TargetOptions.h b/llvm/include/llvm/Target/TargetOptions.h
index e5bea9041479..912f6d1c153a 100644
--- a/llvm/include/llvm/Target/TargetOptions.h
+++ b/llvm/include/llvm/Target/TargetOptions.h
@@ -110,12 +110,23 @@ namespace llvm {
     DisableWithDiag // Disable the abort but emit a diagnostic on failure.
   };
 
+  /// Indicates when and how the Swift async frame pointer bit should be set.
+  enum class SwiftAsyncFramePointerMode {
+    /// Determine whether to set the bit statically or dynamically based
+    /// on the deployment target.
+    DeploymentBased,
+    /// Always set the bit.
+    Always,
+    /// Never set the bit.
+    Never,
+  };
+
   class TargetOptions {
   public:
     TargetOptions()
         : UnsafeFPMath(false), NoInfsFPMath(false), NoNaNsFPMath(false),
           NoTrappingFPMath(true), NoSignedZerosFPMath(false),
-          EnableAIXExtendedAltivecABI(false),
+          ApproxFuncFPMath(false), EnableAIXExtendedAltivecABI(false),
           HonorSignDependentRoundingFPMathOption(false), NoZerosInBSS(false),
           GuaranteedTailCallOpt(false), StackSymbolOrdering(true),
           EnableFastISel(false), EnableGlobalISel(false), UseInitArray(false),
@@ -129,7 +140,7 @@ namespace llvm {
           EnableMachineFunctionSplitter(false), SupportsDefaultOutlining(false),
           EmitAddrsig(false), EmitCallSiteInfo(false),
           SupportsDebugEntryValues(false), EnableDebugEntryValues(false),
-          PseudoProbeForProfiling(false), ValueTrackingVariableLocations(false),
+          ValueTrackingVariableLocations(false),
           ForceDwarfFrameSection(false), XRayOmitFunctionIndex(false),
           DebugStrictDwarf(false),
           FPDenormalMode(DenormalMode::IEEE, DenormalMode::IEEE) {}
@@ -172,9 +183,15 @@ namespace llvm {
     /// argument or result as insignificant.
     unsigned NoSignedZerosFPMath : 1;
 
+    /// ApproxFuncFPMath - This flag is enabled when the
+    /// -enable-approx-func-fp-math is specified on the command line. This
+    /// specifies that optimizations are allowed to substitute math functions
+    /// with approximate calculations
+    unsigned ApproxFuncFPMath : 1;
+
     /// EnableAIXExtendedAltivecABI - This flag returns true when -vec-extabi is
     /// specified. The code generator is then able to use both volatile and
-    /// nonvolitle vector regisers. When false, the code generator only uses
+    /// nonvolitle vector registers. When false, the code generator only uses
     /// volatile vector registers which is the default setting on AIX.
     unsigned EnableAIXExtendedAltivecABI : 1;
 
@@ -219,6 +236,11 @@ namespace llvm {
     /// selection fails to lower/select an instruction.
     GlobalISelAbortMode GlobalISelAbort = GlobalISelAbortMode::Enable;
 
+    /// Control when and how the Swift async frame pointer bit should
+    /// be set.
+    SwiftAsyncFramePointerMode SwiftAsyncFramePointer =
+        SwiftAsyncFramePointerMode::Always;
+
     /// UseInitArray - Use .init_array instead of .ctors for static
     /// constructors.
     unsigned UseInitArray : 1;
@@ -305,9 +327,6 @@ namespace llvm {
     /// production.
     bool ShouldEmitDebugEntryValues() const;
 
-    /// Emit pseudo probes into the binary for sample profiling
-    unsigned PseudoProbeForProfiling : 1;
-
     // When set to true, use experimental new debug variable location tracking,
     // which seeks to follow the values of variables rather than their location,
     // post isel.
@@ -328,6 +347,9 @@ namespace llvm {
     /// passed on the command line.
     std::string StackUsageOutput;
 
+    /// If greater than 0, override TargetLoweringBase::PrefLoopAlignment.
+    unsigned LoopAlignment = 0;
+
     /// FloatABIType - This setting is set by -float-abi=xxx option is specfied
     /// on the command line. This setting may either be Default, Soft, or Hard.
     /// Default selects the target's default behavior. Soft selects the ABI for
diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index 44ec2250a9c5..d8ef7c49a5f9 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -297,10 +297,6 @@ def SDTAtomicLoad : SDTypeProfile<1, 1, [
   SDTCisInt<0>, SDTCisPtrTy<1>
 ]>;
 
-def SDTConvertOp : SDTypeProfile<1, 5, [ //cvtss, su, us, uu, ff, fs, fu, sf, su
-  SDTCisVT<2, OtherVT>, SDTCisVT<3, OtherVT>, SDTCisPtrTy<4>, SDTCisPtrTy<5>
-]>;
-
 class SDCallSeqStart<list<SDTypeConstraint> constraints> :
         SDTypeProfile<0, 2, constraints>;
 class SDCallSeqEnd<list<SDTypeConstraint> constraints> :
@@ -1050,6 +1046,10 @@ def extloadvi32 : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
   let IsLoad = true;
   let ScalarMemoryVT = i32;
 }
+def extloadvf16 : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
+  let IsLoad = true;
+  let ScalarMemoryVT = f16;
+}
 def extloadvf32 : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
   let IsLoad = true;
   let ScalarMemoryVT = f32;
@@ -1472,7 +1472,7 @@ def any_fsetccs : PatFrags<(ops node:$lhs, node:$rhs, node:$pred),
                           [(strict_fsetccs node:$lhs, node:$rhs, node:$pred),
                            (setcc node:$lhs, node:$rhs, node:$pred)]>;
 
-multiclass binary_atomic_op_ord<SDNode atomic_op> {
+multiclass binary_atomic_op_ord {
   def NAME#_monotonic : PatFrag<(ops node:$ptr, node:$val),
       (!cast<SDPatternOperator>(NAME) node:$ptr, node:$val)> {
     let IsAtomic = true;
@@ -1500,7 +1500,7 @@ multiclass binary_atomic_op_ord<SDNode atomic_op> {
   }
 }
 
-multiclass ternary_atomic_op_ord<SDNode atomic_op> {
+multiclass ternary_atomic_op_ord {
   def NAME#_monotonic : PatFrag<(ops node:$ptr, node:$cmp, node:$val),
       (!cast<SDPatternOperator>(NAME) node:$ptr, node:$cmp, node:$val)> {
     let IsAtomic = true;
@@ -1550,10 +1550,10 @@ multiclass binary_atomic_op<SDNode atomic_op, bit IsInt = 1> {
     let MemoryVT = !if(IsInt, i64, f64);
   }
 
-  defm NAME#_8  : binary_atomic_op_ord<atomic_op>;
-  defm NAME#_16 : binary_atomic_op_ord<atomic_op>;
-  defm NAME#_32 : binary_atomic_op_ord<atomic_op>;
-  defm NAME#_64 : binary_atomic_op_ord<atomic_op>;
+  defm NAME#_8  : binary_atomic_op_ord;
+  defm NAME#_16 : binary_atomic_op_ord;
+  defm NAME#_32 : binary_atomic_op_ord;
+  defm NAME#_64 : binary_atomic_op_ord;
 }
 
 multiclass ternary_atomic_op<SDNode atomic_op> {
@@ -1578,10 +1578,10 @@ multiclass ternary_atomic_op<SDNode atomic_op> {
     let MemoryVT = i64;
   }
 
-  defm NAME#_8  : ternary_atomic_op_ord<atomic_op>;
-  defm NAME#_16 : ternary_atomic_op_ord<atomic_op>;
-  defm NAME#_32 : ternary_atomic_op_ord<atomic_op>;
-  defm NAME#_64 : ternary_atomic_op_ord<atomic_op>;
+  defm NAME#_8  : ternary_atomic_op_ord;
+  defm NAME#_16 : ternary_atomic_op_ord;
+  defm NAME#_32 : ternary_atomic_op_ord;
+  defm NAME#_64 : ternary_atomic_op_ord;
 }
 
 defm atomic_load_add  : binary_atomic_op<atomic_load_add>;
diff --git a/llvm/include/llvm/TextAPI/Architecture.h b/llvm/include/llvm/TextAPI/Architecture.h
index 3cd8a3a19e96..978359995074 100644
--- a/llvm/include/llvm/TextAPI/Architecture.h
+++ b/llvm/include/llvm/TextAPI/Architecture.h
@@ -10,8 +10,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_TEXTAPI_MACHO_ARCHITECTURE_H
-#define LLVM_TEXTAPI_MACHO_ARCHITECTURE_H
+#ifndef LLVM_TEXTAPI_ARCHITECTURE_H
+#define LLVM_TEXTAPI_ARCHITECTURE_H
 
 #include <cstdint>
 #include <utility>
@@ -54,4 +54,4 @@ raw_ostream &operator<<(raw_ostream &OS, Architecture Arch);
 } // end namespace MachO.
 } // end namespace llvm.
 
-#endif // LLVM_TEXTAPI_MACHO_ARCHITECTURE_H
+#endif // LLVM_TEXTAPI_ARCHITECTURE_H
diff --git a/llvm/include/llvm/TextAPI/ArchitectureSet.h b/llvm/include/llvm/TextAPI/ArchitectureSet.h
index e9b374e4f69f..f17cb74c9183 100644
--- a/llvm/include/llvm/TextAPI/ArchitectureSet.h
+++ b/llvm/include/llvm/TextAPI/ArchitectureSet.h
@@ -10,8 +10,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_TEXTAPI_MACHO_ARCHITECTURESET_H
-#define LLVM_TEXTAPI_MACHO_ARCHITECTURESET_H
+#ifndef LLVM_TEXTAPI_ARCHITECTURESET_H
+#define LLVM_TEXTAPI_ARCHITECTURESET_H
 
 #include "llvm/TextAPI/Architecture.h"
 #include <cstddef>
@@ -168,4 +168,4 @@ raw_ostream &operator<<(raw_ostream &OS, ArchitectureSet Set);
 } // end namespace MachO.
 } // end namespace llvm.
 
-#endif // LLVM_TEXTAPI_MACHO_ARCHITECTURESET_H
+#endif // LLVM_TEXTAPI_ARCHITECTURESET_H
diff --git a/llvm/include/llvm/TextAPI/InterfaceFile.h b/llvm/include/llvm/TextAPI/InterfaceFile.h
index d17c0c1c5b47..03a541454e1a 100644
--- a/llvm/include/llvm/TextAPI/InterfaceFile.h
+++ b/llvm/include/llvm/TextAPI/InterfaceFile.h
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_TEXTAPI_MACHO_INTERFACEFILE_H
-#define LLVM_TEXTAPI_MACHO_INTERFACEFILE_H
+#ifndef LLVM_TEXTAPI_INTERFACEFILE_H
+#define LLVM_TEXTAPI_INTERFACEFILE_H
 
 #include "llvm/ADT/BitmaskEnum.h"
 #include "llvm/ADT/DenseMap.h"
@@ -445,7 +445,7 @@ bool operator==(const DenseMapBase<DerivedT, SymbolsMapKey, MachO::Symbol *,
                                    KeyInfoT, BucketT> &RHS) {
   if (LHS.size() != RHS.size())
     return false;
-  for (auto KV : LHS) {
+  for (const auto &KV : LHS) {
     auto I = RHS.find(KV.first);
     if (I == RHS.end() || *I->second != *KV.second)
       return false;
@@ -456,4 +456,4 @@ bool operator==(const DenseMapBase<DerivedT, SymbolsMapKey, MachO::Symbol *,
 } // end namespace MachO.
 } // end namespace llvm.
 
-#endif // LLVM_TEXTAPI_MACHO_INTERFACEFILE_H
+#endif // LLVM_TEXTAPI_INTERFACEFILE_H
diff --git a/llvm/include/llvm/TextAPI/PackedVersion.h b/llvm/include/llvm/TextAPI/PackedVersion.h
index e3d2bd5ae2e5..24bec2ebe8fc 100644
--- a/llvm/include/llvm/TextAPI/PackedVersion.h
+++ b/llvm/include/llvm/TextAPI/PackedVersion.h
@@ -10,8 +10,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_TEXTAPI_MACHO_PACKEDVERSION_H
-#define LLVM_TEXTAPI_MACHO_PACKEDVERSION_H
+#ifndef LLVM_TEXTAPI_PACKEDVERSION_H
+#define LLVM_TEXTAPI_PACKEDVERSION_H
 
 #include <cstdint>
 #include <utility>
@@ -64,4 +64,4 @@ inline raw_ostream &operator<<(raw_ostream &OS, const PackedVersion &Version) {
 } // end namespace MachO.
 } // end namespace llvm.
 
-#endif // LLVM_TEXTAPI_MACHO_PACKEDVERSION_H
+#endif // LLVM_TEXTAPI_PACKEDVERSION_H
diff --git a/llvm/include/llvm/TextAPI/Platform.h b/llvm/include/llvm/TextAPI/Platform.h
index 3f052b7b8624..f7affc3ae980 100644
--- a/llvm/include/llvm/TextAPI/Platform.h
+++ b/llvm/include/llvm/TextAPI/Platform.h
@@ -9,8 +9,8 @@
 // Defines the Platforms supported by Tapi and helpers.
 //
 //===----------------------------------------------------------------------===//
-#ifndef LLVM_TEXTAPI_MACHO_PLATFORM_H
-#define LLVM_TEXTAPI_MACHO_PLATFORM_H
+#ifndef LLVM_TEXTAPI_PLATFORM_H
+#define LLVM_TEXTAPI_PLATFORM_H
 
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/BinaryFormat/MachO.h"
@@ -46,4 +46,4 @@ std::string getOSAndEnvironmentName(PlatformKind Platform,
 } // end namespace MachO.
 } // end namespace llvm.
 
-#endif // LLVM_TEXTAPI_MACHO_PLATFORM_H
+#endif // LLVM_TEXTAPI_PLATFORM_H
diff --git a/llvm/include/llvm/TextAPI/Symbol.h b/llvm/include/llvm/TextAPI/Symbol.h
index 02f184d2502f..dfc84908bba2 100644
--- a/llvm/include/llvm/TextAPI/Symbol.h
+++ b/llvm/include/llvm/TextAPI/Symbol.h
@@ -6,8 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_TEXTAPI_MACHO_SYMBOL_H
-#define LLVM_TEXTAPI_MACHO_SYMBOL_H
+#ifndef LLVM_TEXTAPI_SYMBOL_H
+#define LLVM_TEXTAPI_SYMBOL_H
 
 #include "llvm/ADT/BitmaskEnum.h"
 #include "llvm/ADT/StringRef.h"
@@ -132,4 +132,4 @@ private:
 } // end namespace MachO.
 } // end namespace llvm.
 
-#endif // LLVM_TEXTAPI_MACHO_SYMBOL_H
+#endif // LLVM_TEXTAPI_SYMBOL_H
diff --git a/llvm/include/llvm/TextAPI/Target.h b/llvm/include/llvm/TextAPI/Target.h
index 53f56a6ee7b0..c2588b9d5a21 100644
--- a/llvm/include/llvm/TextAPI/Target.h
+++ b/llvm/include/llvm/TextAPI/Target.h
@@ -6,8 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_TEXTAPI_MACHO_TARGET_H
-#define LLVM_TEXTAPI_MACHO_TARGET_H
+#ifndef LLVM_TEXTAPI_TARGET_H
+#define LLVM_TEXTAPI_TARGET_H
 
 #include "llvm/ADT/Triple.h"
 #include "llvm/Support/Error.h"
@@ -67,4 +67,4 @@ raw_ostream &operator<<(raw_ostream &OS, const Target &Target);
 } // namespace MachO
 } // namespace llvm
 
-#endif // LLVM_TEXTAPI_MACHO_TARGET_H
+#endif // LLVM_TEXTAPI_TARGET_H
diff --git a/llvm/include/llvm/TextAPI/TextAPIReader.h b/llvm/include/llvm/TextAPI/TextAPIReader.h
index a403bab8465d..389335312a74 100644
--- a/llvm/include/llvm/TextAPI/TextAPIReader.h
+++ b/llvm/include/llvm/TextAPI/TextAPIReader.h
@@ -6,8 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_TEXTAPI_MACHO_TEXTAPIREADER_H
-#define LLVM_TEXTAPI_MACHO_TEXTAPIREADER_H
+#ifndef LLVM_TEXTAPI_TEXTAPIREADER_H
+#define LLVM_TEXTAPI_TEXTAPIREADER_H
 
 #include "llvm/Support/Error.h"
 
@@ -30,4 +30,4 @@ public:
 } // end namespace MachO.
 } // end namespace llvm.
 
-#endif // LLVM_TEXTAPI_MACHO_TEXTAPIREADER_H
+#endif // LLVM_TEXTAPI_TEXTAPIREADER_H
diff --git a/llvm/include/llvm/TextAPI/TextAPIWriter.h b/llvm/include/llvm/TextAPI/TextAPIWriter.h
index 763805168ae6..f9857a806f60 100644
--- a/llvm/include/llvm/TextAPI/TextAPIWriter.h
+++ b/llvm/include/llvm/TextAPI/TextAPIWriter.h
@@ -6,8 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_TEXTAPI_MACHO_TEXTAPIWRITER_H
-#define LLVM_TEXTAPI_MACHO_TEXTAPIWRITER_H
+#ifndef LLVM_TEXTAPI_TEXTAPIWRITER_H
+#define LLVM_TEXTAPI_TEXTAPIWRITER_H
 
 namespace llvm {
 
@@ -28,4 +28,4 @@ public:
 } // end namespace MachO.
 } // end namespace llvm.
 
-#endif // LLVM_TEXTAPI_MACHO_TEXTAPIWRITER_H
+#endif // LLVM_TEXTAPI_TEXTAPIWRITER_H
diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index c93b8adcc890..d4cbc9bd20b7 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -101,6 +101,7 @@
 #include "llvm/ADT/GraphTraits.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetOperations.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/ADT/iterator.h"
@@ -591,7 +592,7 @@ struct IRPosition {
 
     LLVMContext &Ctx = getAnchorValue().getContext();
     for (Attribute::AttrKind AK : AKs)
-      AttrList = AttrList.removeAttribute(Ctx, getAttrIdx(), AK);
+      AttrList = AttrList.removeAttributeAtIndex(Ctx, getAttrIdx(), AK);
 
     if (CB)
       CB->setAttributes(AttrList);
@@ -1150,8 +1151,6 @@ struct Attributor {
   /// \param Allowed If not null, a set limiting the attribute opportunities.
   /// \param DeleteFns Whether to delete functions.
   /// \param RewriteSignatures Whether to rewrite function signatures.
-  /// \param MaxFixedPointIterations Maximum number of iterations to run until
-  ///                                fixpoint.
   Attributor(SetVector<Function *> &Functions, InformationCache &InfoCache,
              CallGraphUpdater &CGUpdater,
              DenseSet<const char *> *Allowed = nullptr, bool DeleteFns = true,
@@ -1169,8 +1168,9 @@ struct Attributor {
   /// \param CGUpdater Helper to update an underlying call graph.
   /// \param Allowed If not null, a set limiting the attribute opportunities.
   /// \param DeleteFns Whether to delete functions
-  /// \param MaxFixedPointIterations Maximum number of iterations to run until
-  ///                                fixpoint.
+  /// \param RewriteSignatures Whether to rewrite function signatures.
+  /// \param MaxFixpointIterations Maximum number of iterations to run until
+  ///                              fixpoint.
   /// \param OREGetter A callback function that returns an ORE object from a
   ///                  Function pointer.
   /// \param PassName  The name of the pass emitting remarks.
@@ -1855,6 +1855,10 @@ public:
   ///
   static void createShallowWrapper(Function &F);
 
+  /// Returns true if the function \p F can be internalized. i.e. it has a
+  /// compatible linkage.
+  static bool isInternalizable(Function &F);
+
   /// Make another copy of the function \p F such that the copied version has
   /// internal linkage afterwards and can be analysed. Then we replace all uses
   /// of the original function to the copied one
@@ -1870,6 +1874,22 @@ public:
   /// null pointer.
   static Function *internalizeFunction(Function &F, bool Force = false);
 
+  /// Make copies of each function in the set \p FnSet such that the copied
+  /// version has internal linkage afterwards and can be analysed. Then we
+  /// replace all uses of the original function to the copied one. The map
+  /// \p FnMap contains a mapping of functions to their internalized versions.
+  ///
+  /// Only non-locally linked functions that have `linkonce_odr` or `weak_odr`
+  /// linkage can be internalized because these linkages guarantee that other
+  /// definitions with the same name have the same semantics as this one.
+  ///
+  /// This version will internalize all the functions in the set \p FnSet at
+  /// once and then replace the uses. This prevents internalized functions being
+  /// called by external functions when there is an internalized version in the
+  /// module.
+  static bool internalizeFunctions(SmallPtrSetImpl<Function *> &FnSet,
+                                   DenseMap<Function *, Function *> &FnMap);
+
   /// Return the data layout associated with the anchor scope.
   const DataLayout &getDataLayout() const { return InfoCache.DL; }
 
@@ -2492,6 +2512,139 @@ struct IntegerRangeState : public AbstractState {
     return *this;
   }
 };
+
+/// Simple state for a set.
+///
+/// This represents a state containing a set of values. The interface supports
+/// modelling sets that contain all possible elements. The state's internal
+/// value is modified using union or intersection operations.
+template <typename BaseTy> struct SetState : public AbstractState {
+  /// A wrapper around a set that has semantics for handling unions and
+  /// intersections with a "universal" set that contains all elements.
+  struct SetContents {
+    /// Creates a universal set with no concrete elements or an empty set.
+    SetContents(bool Universal) : Universal(Universal) {}
+
+    /// Creates a non-universal set with concrete values.
+    SetContents(const DenseSet<BaseTy> &Assumptions)
+        : Universal(false), Set(Assumptions) {}
+
+    SetContents(bool Universal, const DenseSet<BaseTy> &Assumptions)
+        : Universal(Universal), Set(Assumptions) {}
+
+    const DenseSet<BaseTy> &getSet() const { return Set; }
+
+    bool isUniversal() const { return Universal; }
+
+    bool empty() const { return Set.empty() && !Universal; }
+
+    /// Finds A := A ^ B where A or B could be the "Universal" set which
+    /// contains every possible attribute. Returns true if changes were made.
+    bool getIntersection(const SetContents &RHS) {
+      bool IsUniversal = Universal;
+      unsigned Size = Set.size();
+
+      // A := A ^ U = A
+      if (RHS.isUniversal())
+        return false;
+
+      // A := U ^ B = B
+      if (Universal)
+        Set = RHS.getSet();
+      else
+        set_intersect(Set, RHS.getSet());
+
+      Universal &= RHS.isUniversal();
+      return IsUniversal != Universal || Size != Set.size();
+    }
+
+    /// Finds A := A u B where A or B could be the "Universal" set which
+    /// contains every possible attribute. returns true if changes were made.
+    bool getUnion(const SetContents &RHS) {
+      bool IsUniversal = Universal;
+      unsigned Size = Set.size();
+
+      // A := A u U = U = U u B
+      if (!RHS.isUniversal() && !Universal)
+        set_union(Set, RHS.getSet());
+
+      Universal |= RHS.isUniversal();
+      return IsUniversal != Universal || Size != Set.size();
+    }
+
+  private:
+    /// Indicates if this set is "universal", containing every possible element.
+    bool Universal;
+
+    /// The set of currently active assumptions.
+    DenseSet<BaseTy> Set;
+  };
+
+  SetState() : Known(false), Assumed(true), IsAtFixedpoint(false) {}
+
+  /// Initializes the known state with an initial set and initializes the
+  /// assumed state as universal.
+  SetState(const DenseSet<BaseTy> &Known)
+      : Known(Known), Assumed(true), IsAtFixedpoint(false) {}
+
+  /// See AbstractState::isValidState()
+  bool isValidState() const override { return !Assumed.empty(); }
+
+  /// See AbstractState::isAtFixpoint()
+  bool isAtFixpoint() const override { return IsAtFixedpoint; }
+
+  /// See AbstractState::indicateOptimisticFixpoint(...)
+  ChangeStatus indicateOptimisticFixpoint() override {
+    IsAtFixedpoint = true;
+    Known = Assumed;
+    return ChangeStatus::UNCHANGED;
+  }
+
+  /// See AbstractState::indicatePessimisticFixpoint(...)
+  ChangeStatus indicatePessimisticFixpoint() override {
+    IsAtFixedpoint = true;
+    Assumed = Known;
+    return ChangeStatus::CHANGED;
+  }
+
+  /// Return the known state encoding.
+  const SetContents &getKnown() const { return Known; }
+
+  /// Return the assumed state encoding.
+  const SetContents &getAssumed() const { return Assumed; }
+
+  /// Returns if the set state contains the element.
+  bool setContains(const BaseTy &Elem) const {
+    return Assumed.getSet().contains(Elem) || Known.getSet().contains(Elem);
+  }
+
+  /// Performs the set intersection between this set and \p RHS. Returns true if
+  /// changes were made.
+  bool getIntersection(const SetContents &RHS) {
+    unsigned SizeBefore = Assumed.getSet().size();
+
+    // Get intersection and make sure that the known set is still a proper
+    // subset of the assumed set. A := K u (A ^ R).
+    Assumed.getIntersection(RHS);
+    Assumed.getUnion(Known);
+
+    return SizeBefore != Assumed.getSet().size();
+  }
+
+  /// Performs the set union between this set and \p RHS. Returns true if
+  /// changes were made.
+  bool getUnion(const SetContents &RHS) { return Assumed.getUnion(RHS); }
+
+private:
+  /// The set of values known for this state.
+  SetContents Known;
+
+  /// The set of assumed values for this state.
+  SetContents Assumed;
+
+  bool IsAtFixedpoint;
+};
+
 /// Helper struct necessary as the modular build fails if the virtual method
 /// IRAttribute::manifest is defined in the Attributor.cpp.
 struct IRAttributeManifest {
@@ -3394,7 +3547,7 @@ struct AADereferenceable
 };
 
 using AAAlignmentStateType =
-    IncIntegerState<uint32_t, Value::MaximumAlignment, 1>;
+    IncIntegerState<uint64_t, Value::MaximumAlignment, 1>;
 /// An abstract interface for all align attributes.
 struct AAAlign : public IRAttribute<
                      Attribute::Alignment,
@@ -3402,10 +3555,10 @@ struct AAAlign : public IRAttribute<
   AAAlign(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
 
   /// Return assumed alignment.
-  unsigned getAssumedAlign() const { return getAssumed(); }
+  uint64_t getAssumedAlign() const { return getAssumed(); }
 
   /// Return known alignment.
-  unsigned getKnownAlign() const { return getKnown(); }
+  uint64_t getKnownAlign() const { return getKnown(); }
 
   /// See AbstractAttribute::getName()
   const std::string getName() const override { return "AAAlign"; }
@@ -3776,7 +3929,7 @@ struct AAMemoryLocation
   /// Return true if we assume that the associated functions has no observable
   /// accesses.
   bool isAssumedReadNone() const {
-    return isAssumed(NO_LOCATIONS) | isAssumedStackOnly();
+    return isAssumed(NO_LOCATIONS) || isAssumedStackOnly();
   }
 
   /// Return true if we know that the associated functions has at most
@@ -3920,19 +4073,19 @@ struct AAValueConstantRange
   static AAValueConstantRange &createForPosition(const IRPosition &IRP,
                                                  Attributor &A);
 
-  /// Return an assumed range for the assocaited value a program point \p CtxI.
+  /// Return an assumed range for the associated value a program point \p CtxI.
   /// If \p I is nullptr, simply return an assumed range.
   virtual ConstantRange
   getAssumedConstantRange(Attributor &A,
                           const Instruction *CtxI = nullptr) const = 0;
 
-  /// Return a known range for the assocaited value at a program point \p CtxI.
+  /// Return a known range for the associated value at a program point \p CtxI.
   /// If \p I is nullptr, simply return a known range.
   virtual ConstantRange
   getKnownConstantRange(Attributor &A,
                         const Instruction *CtxI = nullptr) const = 0;
 
-  /// Return an assumed constant for the assocaited value a program point \p
+  /// Return an assumed constant for the associated value a program point \p
   /// CtxI.
   Optional<ConstantInt *>
   getAssumedConstantInt(Attributor &A,
@@ -4435,6 +4588,9 @@ struct AAFunctionReachability
   /// If the function represented by this possition can reach \p Fn.
   virtual bool canReach(Attributor &A, Function *Fn) const = 0;
 
+  /// Can \p CB reach \p Fn
+  virtual bool canReach(Attributor &A, CallBase &CB, Function *Fn) const = 0;
+
   /// Create an abstract attribute view for the position \p IRP.
   static AAFunctionReachability &createForPosition(const IRPosition &IRP,
                                                    Attributor &A);
@@ -4587,6 +4743,40 @@ struct AAPointerInfo : public AbstractAttribute {
   static const char ID;
 };
 
+/// An abstract attribute for getting assumption information.
+struct AAAssumptionInfo
+    : public StateWrapper<SetState<StringRef>, AbstractAttribute,
+                          DenseSet<StringRef>> {
+  using Base =
+      StateWrapper<SetState<StringRef>, AbstractAttribute, DenseSet<StringRef>>;
+
+  AAAssumptionInfo(const IRPosition &IRP, Attributor &A,
+                   const DenseSet<StringRef> &Known)
+      : Base(IRP, Known) {}
+
+  /// Returns true if the assumption set contains the assumption \p Assumption.
+  virtual bool hasAssumption(const StringRef Assumption) const = 0;
+
+  /// Create an abstract attribute view for the position \p IRP.
+  static AAAssumptionInfo &createForPosition(const IRPosition &IRP,
+                                             Attributor &A);
+
+  /// See AbstractAttribute::getName()
+  const std::string getName() const override { return "AAAssumptionInfo"; }
+
+  /// See AbstractAttribute::getIdAddr()
+  const char *getIdAddr() const override { return &ID; }
+
+  /// This function should return true if the type of the \p AA is
+  /// AAAssumptionInfo
+  static bool classof(const AbstractAttribute *AA) {
+    return (AA->getIdAddr() == &ID);
+  }
+
+  /// Unique ID (due to the unique address)
+  static const char ID;
+};
+
 raw_ostream &operator<<(raw_ostream &, const AAPointerInfo::Access &);
 
 /// Run options, used by the pass manager.
diff --git a/llvm/include/llvm/Transforms/IPO/FunctionAttrs.h b/llvm/include/llvm/Transforms/IPO/FunctionAttrs.h
index ce61eea05c79..0b6734a3929d 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionAttrs.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionAttrs.h
@@ -17,6 +17,7 @@
 
 #include "llvm/Analysis/CGSCCPassManager.h"
 #include "llvm/Analysis/LazyCallGraph.h"
+#include "llvm/IR/ModuleSummaryIndex.h"
 #include "llvm/IR/PassManager.h"
 
 namespace llvm {
@@ -38,6 +39,13 @@ enum MemoryAccessKind {
 /// Returns the memory access properties of this copy of the function.
 MemoryAccessKind computeFunctionBodyMemoryAccess(Function &F, AAResults &AAR);
 
+/// Propagate function attributes for function summaries along the index's
+/// callgraph during thinlink
+bool thinLTOPropagateFunctionAttrs(
+    ModuleSummaryIndex &Index,
+    function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
+        isPrevailing);
+
 /// Computes function attributes in post-order over the call graph.
 ///
 /// By operating in post-order, this pass computes precise attributes for
diff --git a/llvm/include/llvm/Transforms/IPO/FunctionImport.h b/llvm/include/llvm/Transforms/IPO/FunctionImport.h
index aad938d48570..c5bafb89fcb5 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionImport.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionImport.h
@@ -167,16 +167,24 @@ void ComputeCrossModuleImportForModuleFromIndex(
     FunctionImporter::ImportMapTy &ImportList);
 
 /// PrevailingType enum used as a return type of callback passed
-/// to computeDeadSymbols. Yes and No values used when status explicitly
-/// set by symbols resolution, otherwise status is Unknown.
+/// to computeDeadSymbolsAndUpdateIndirectCalls. Yes and No values used when
+/// status explicitly set by symbols resolution, otherwise status is Unknown.
 enum class PrevailingType { Yes, No, Unknown };
 
+/// Update call edges for indirect calls to local functions added from
+/// SamplePGO when needed. Normally this is done during
+/// computeDeadSymbolsAndUpdateIndirectCalls, but can be called standalone
+/// when that is not called (e.g. during testing).
+void updateIndirectCalls(ModuleSummaryIndex &Index);
+
 /// Compute all the symbols that are "dead": i.e these that can't be reached
 /// in the graph from any of the given symbols listed in
 /// \p GUIDPreservedSymbols. Non-prevailing symbols are symbols without a
 /// prevailing copy anywhere in IR and are normally dead, \p isPrevailing
 /// predicate returns status of symbol.
-void computeDeadSymbols(
+/// Also update call edges for indirect calls to local functions added from
+/// SamplePGO when needed.
+void computeDeadSymbolsAndUpdateIndirectCalls(
     ModuleSummaryIndex &Index,
     const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
     function_ref<PrevailingType(GlobalValue::GUID)> isPrevailing);
@@ -214,12 +222,15 @@ std::error_code EmitImportsFiles(
     StringRef ModulePath, StringRef OutputFilename,
     const std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex);
 
-/// Resolve prevailing symbol linkages and constrain visibility (1. CanAutoHide,
-/// 2. consider visibility from other definitions for ELF) in \p TheModule based
-/// on the information recorded in the summaries during global summary-based
-/// analysis.
-void thinLTOResolvePrevailingInModule(Module &TheModule,
-                                      const GVSummaryMapTy &DefinedGlobals);
+/// Based on the information recorded in the summaries during global
+/// summary-based analysis:
+/// 1. Resolve prevailing symbol linkages and constrain visibility (CanAutoHide
+///    and consider visibility from other definitions for ELF) in \p TheModule
+/// 2. (optional) Apply propagated function attributes to \p TheModule if
+///    PropagateAttrs is true
+void thinLTOFinalizeInModule(Module &TheModule,
+                             const GVSummaryMapTy &DefinedGlobals,
+                             bool PropagateAttrs);
 
 /// Internalize \p TheModule based on the information recorded in the summaries
 /// during global summary-based analysis.
diff --git a/llvm/include/llvm/Transforms/IPO/IROutliner.h b/llvm/include/llvm/Transforms/IPO/IROutliner.h
index 442a8ec1d2e2..110c0b4dcf16 100644
--- a/llvm/include/llvm/Transforms/IPO/IROutliner.h
+++ b/llvm/include/llvm/Transforms/IPO/IROutliner.h
@@ -86,6 +86,15 @@ struct OutlinableRegion {
   DenseMap<unsigned, unsigned> ExtractedArgToAgg;
   DenseMap<unsigned, unsigned> AggArgToExtracted;
 
+  /// Marks whether we need to change the order of the arguments when mapping
+  /// the old extracted function call to the new aggregate outlined function
+  /// call.
+  bool ChangedArgOrder = false;
+
+  /// Marks whether this region ends in a branch, there is special handling
+  /// required for the following basic blocks in this case.
+  bool EndsInBranch = false;
+
   /// Mapping of the argument number in the deduplicated function
   /// to a given constant, which is used when creating the arguments to the call
   /// to the newly created deduplicated function.  This is handled separately
@@ -147,6 +156,14 @@ struct OutlinableRegion {
   /// containing the called function.
   void reattachCandidate();
 
+  /// Find a corresponding value for \p V in similar OutlinableRegion \p Other.
+  ///
+  /// \param Other [in] - The OutlinableRegion to find the corresponding Value
+  /// in.
+  /// \param V [in] - The Value to look for in the other region.
+  /// \return The corresponding Value to \p V if it exists, otherwise nullptr.
+  Value *findCorrespondingValueIn(const OutlinableRegion &Other, Value *V);
+
   /// Get the size of the code removed from the region.
   ///
   /// \param [in] TTI - The TargetTransformInfo for the parent function.
@@ -176,6 +193,16 @@ private:
   /// \returns The number of Functions created.
   unsigned doOutline(Module &M);
 
+  /// Check whether an OutlinableRegion is incompatible with code already
+  /// outlined. OutlinableRegions are incomptaible when there are overlapping
+  /// instructions, or code that has not been recorded has been added to the
+  /// instructions.
+  ///
+  /// \param [in] Region - The OutlinableRegion to check for conflicts with
+  /// already outlined code.
+  /// \returns whether the region can safely be outlined.
+  bool isCompatibleWithAlreadyOutlinedCode(const OutlinableRegion &Region);
+
   /// Remove all the IRSimilarityCandidates from \p CandidateVec that have
   /// instructions contained in a previously outlined region and put the
   /// remaining regions in \p CurrentGroup.
@@ -301,8 +328,9 @@ private:
   struct InstructionAllowed : public InstVisitor<InstructionAllowed, bool> {
     InstructionAllowed() {}
 
-    // TODO: Determine a scheme to resolve when the label is similar enough.
-    bool visitBranchInst(BranchInst &BI) { return false; }
+    bool visitBranchInst(BranchInst &BI) { 
+      return EnableBranches;
+    }
     // TODO: Determine a scheme to resolve when the labels are similar enough.
     bool visitPHINode(PHINode &PN) { return false; }
     // TODO: Handle allocas.
@@ -341,6 +369,10 @@ private:
     // TODO: Handle interblock similarity.
     bool visitTerminator(Instruction &I) { return false; }
     bool visitInstruction(Instruction &I) { return true; }
+
+    // The flag variable that marks whether we should allow branch instructions
+    // to be outlined.
+    bool EnableBranches = false;
   };
 
   /// A InstVisitor used to exclude certain instructions from being outlined.
diff --git a/llvm/include/llvm/Transforms/IPO/Inliner.h b/llvm/include/llvm/Transforms/IPO/Inliner.h
index 23a39d7f2e2b..a7060943c4c0 100644
--- a/llvm/include/llvm/Transforms/IPO/Inliner.h
+++ b/llvm/include/llvm/Transforms/IPO/Inliner.h
@@ -14,7 +14,6 @@
 #include "llvm/Analysis/InlineAdvisor.h"
 #include "llvm/Analysis/InlineCost.h"
 #include "llvm/Analysis/LazyCallGraph.h"
-#include "llvm/Analysis/ReplayInlineAdvisor.h"
 #include "llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h"
 #include "llvm/IR/PassManager.h"
 #include <utility>
@@ -103,6 +102,9 @@ public:
   PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM,
                         LazyCallGraph &CG, CGSCCUpdateResult &UR);
 
+  void printPipeline(raw_ostream &OS,
+                     function_ref<StringRef(StringRef)> MapClassName2PassName);
+
 private:
   InlineAdvisor &getAdvisor(const ModuleAnalysisManagerCGSCCProxy::Result &MAM,
                             FunctionAnalysisManager &FAM, Module &M);
@@ -130,17 +132,27 @@ public:
   /// before run is called, as part of pass pipeline building.
   CGSCCPassManager &getPM() { return PM; }
 
-  /// Allow adding module-level passes benefiting the contained CGSCC passes.
+  /// Add a module pass that runs before the CGSCC passes.
   template <class T> void addModulePass(T Pass) {
     MPM.addPass(std::move(Pass));
   }
 
+  /// Add a module pass that runs after the CGSCC passes.
+  template <class T> void addLateModulePass(T Pass) {
+    AfterCGMPM.addPass(std::move(Pass));
+  }
+
+  void printPipeline(raw_ostream &OS,
+                     function_ref<StringRef(StringRef)> MapClassName2PassName);
+
 private:
   const InlineParams Params;
   const InliningAdvisorMode Mode;
   const unsigned MaxDevirtIterations;
+  // TODO: Clean this up so we only have one ModulePassManager.
   CGSCCPassManager PM;
   ModulePassManager MPM;
+  ModulePassManager AfterCGMPM;
 };
 } // end namespace llvm
 
diff --git a/llvm/include/llvm/Transforms/IPO/LoopExtractor.h b/llvm/include/llvm/Transforms/IPO/LoopExtractor.h
index def3c5943919..aa697484d0e9 100644
--- a/llvm/include/llvm/Transforms/IPO/LoopExtractor.h
+++ b/llvm/include/llvm/Transforms/IPO/LoopExtractor.h
@@ -23,6 +23,8 @@ namespace llvm {
 struct LoopExtractorPass : public PassInfoMixin<LoopExtractorPass> {
   LoopExtractorPass(unsigned NumLoops = ~0) : NumLoops(NumLoops) {}
   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+  void printPipeline(raw_ostream &OS,
+                     function_ref<StringRef(StringRef)> MapClassName2PassName);
 
 private:
   unsigned NumLoops;
diff --git a/llvm/include/llvm/Transforms/IPO/ModuleInliner.h b/llvm/include/llvm/Transforms/IPO/ModuleInliner.h
new file mode 100644
index 000000000000..963d74d71003
--- /dev/null
+++ b/llvm/include/llvm/Transforms/IPO/ModuleInliner.h
@@ -0,0 +1,51 @@
+//===- ModuleInliner.h - Module level Inliner pass --------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_IPO_MODULEINLINER_H
+#define LLVM_TRANSFORMS_IPO_MODULEINLINER_H
+
+#include "llvm/Analysis/InlineAdvisor.h"
+#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/ReplayInlineAdvisor.h"
+#include "llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h"
+#include "llvm/IR/PassManager.h"
+#include <utility>
+
+namespace llvm {
+
+class AssumptionCacheTracker;
+class ProfileSummaryInfo;
+
+/// The module inliner pass for the new pass manager.
+///
+/// This pass wires together the inlining utilities and the inline cost
+/// analysis into a module pass. Different from SCC inliner, it considers every
+/// call in every function in the whole module and tries to inline if
+/// profitable. With this module level inliner, it is possible to evaluate more
+/// heuristics in the module level such like PriorityInlineOrder. It can be
+/// tuned with a number of parameters to control what cost model is used and
+/// what tradeoffs are made when making the decision.
+class ModuleInlinerPass : public PassInfoMixin<ModuleInlinerPass> {
+public:
+  ModuleInlinerPass(InlineParams Params = getInlineParams(),
+                    InliningAdvisorMode Mode = InliningAdvisorMode::Default)
+      : Params(Params), Mode(Mode){};
+  ModuleInlinerPass(ModuleInlinerPass &&Arg) = default;
+
+  PreservedAnalyses run(Module &, ModuleAnalysisManager &);
+
+private:
+  InlineAdvisor &getAdvisor(const ModuleAnalysisManager &MAM,
+                            FunctionAnalysisManager &FAM, Module &M);
+  std::unique_ptr<InlineAdvisor> OwnedAdvisor;
+  const InlineParams Params;
+  const InliningAdvisorMode Mode;
+};
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_IPO_MODULEINLINER_H
diff --git a/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h b/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h
index 4f941d26df4c..7f321a688aff 100644
--- a/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h
+++ b/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h
@@ -154,7 +154,6 @@ public:
   /// tests.
   const ModuleSummaryIndex *ImportSummary = nullptr;
 
-  bool DisableTailCalls;
   bool DisableUnrollLoops;
   bool CallGraphProfile;
   bool SLPVectorize;
diff --git a/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h b/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h
index 0adaa1b16d54..6e45f8f6fb05 100644
--- a/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h
+++ b/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h
@@ -6,8 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_TOOLS_LLVM_PROFGEN_PROFILEDCALLGRAPH_H
-#define LLVM_TOOLS_LLVM_PROFGEN_PROFILEDCALLGRAPH_H
+#ifndef LLVM_TRANSFORMS_IPO_PROFILEDCALLGRAPH_H
+#define LLVM_TRANSFORMS_IPO_PROFILEDCALLGRAPH_H
 
 #include "llvm/ADT/GraphTraits.h"
 #include "llvm/ADT/StringMap.h"
@@ -42,7 +42,7 @@ public:
   using iterator = std::set<ProfiledCallGraphNode *>::iterator;
 
   // Constructor for non-CS profile.
-  ProfiledCallGraph(StringMap<FunctionSamples> &ProfileMap) {
+  ProfiledCallGraph(SampleProfileMap &ProfileMap) {
     assert(!FunctionSamples::ProfileIsCS && "CS profile is not handled here");
     for (const auto &Samples : ProfileMap) {
       addProfiledCalls(Samples.second);
@@ -56,7 +56,7 @@ public:
     std::queue<ContextTrieNode *> Queue;
     for (auto &Child : ContextTracker.getRootContext().getAllChildContext()) {
       ContextTrieNode *Callee = &Child.second;
-      addProfiledFunction(Callee->getFuncName());
+      addProfiledFunction(ContextTracker.getFuncNameFor(Callee));
       Queue.push(Callee);
     }
 
@@ -72,9 +72,10 @@ public:
       // context-based one, which may in turn block context-based inlining.
       for (auto &Child : Caller->getAllChildContext()) {
         ContextTrieNode *Callee = &Child.second;
-        addProfiledFunction(Callee->getFuncName());
+        addProfiledFunction(ContextTracker.getFuncNameFor(Callee));
         Queue.push(Callee);
-        addProfiledCall(Caller->getFuncName(), Callee->getFuncName());
+        addProfiledCall(ContextTracker.getFuncNameFor(Caller),
+                        ContextTracker.getFuncNameFor(Callee));
       }
     }
   }
diff --git a/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h b/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h
index 94f7796298db..5d80da407d7e 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h
@@ -42,31 +42,34 @@ public:
       : ParentContext(Parent), FuncName(FName), FuncSamples(FSamples),
         CallSiteLoc(CallLoc){};
   ContextTrieNode *getChildContext(const LineLocation &CallSite,
-                                   StringRef CalleeName);
+                                   StringRef ChildName);
   ContextTrieNode *getHottestChildContext(const LineLocation &CallSite);
   ContextTrieNode *getOrCreateChildContext(const LineLocation &CallSite,
-                                           StringRef CalleeName,
+                                           StringRef ChildName,
                                            bool AllowCreate = true);
 
   ContextTrieNode &moveToChildContext(const LineLocation &CallSite,
                                       ContextTrieNode &&NodeToMove,
-                                      StringRef ContextStrToRemove,
+                                      uint32_t ContextFramesToRemove,
                                       bool DeleteNode = true);
-  void removeChildContext(const LineLocation &CallSite, StringRef CalleeName);
-  std::map<uint32_t, ContextTrieNode> &getAllChildContext();
+  void removeChildContext(const LineLocation &CallSite, StringRef ChildName);
+  std::map<uint64_t, ContextTrieNode> &getAllChildContext();
   StringRef getFuncName() const;
   FunctionSamples *getFunctionSamples() const;
   void setFunctionSamples(FunctionSamples *FSamples);
+  Optional<uint32_t> getFunctionSize() const;
+  void addFunctionSize(uint32_t FSize);
   LineLocation getCallSiteLoc() const;
   ContextTrieNode *getParentContext() const;
   void setParentContext(ContextTrieNode *Parent);
-  void dump();
+  void dumpNode();
+  void dumpTree();
 
 private:
-  static uint32_t nodeHash(StringRef ChildName, const LineLocation &Callsite);
+  static uint64_t nodeHash(StringRef ChildName, const LineLocation &Callsite);
 
   // Map line+discriminator location to child context
-  std::map<uint32_t, ContextTrieNode> AllChildContext;
+  std::map<uint64_t, ContextTrieNode> AllChildContext;
 
   // Link to parent context node
   ContextTrieNode *ParentContext;
@@ -77,6 +80,9 @@ private:
   // Function Samples for current context
   FunctionSamples *FuncSamples;
 
+  // Function size for current context
+  Optional<uint32_t> FuncSize;
+
   // Callsite location in parent context
   LineLocation CallSiteLoc;
 };
@@ -90,9 +96,22 @@ private:
 // calling context and the context is identified by path from root to the node.
 class SampleContextTracker {
 public:
-  using ContextSamplesTy = SmallVector<FunctionSamples *, 16>;
-
-  SampleContextTracker(StringMap<FunctionSamples> &Profiles);
+  struct ProfileComparer {
+    bool operator()(FunctionSamples *A, FunctionSamples *B) const {
+      // Sort function profiles by the number of total samples and their
+      // contexts.
+      if (A->getTotalSamples() == B->getTotalSamples())
+        return A->getContext() < B->getContext();
+      return A->getTotalSamples() > B->getTotalSamples();
+    }
+  };
+
+  // Keep profiles of a function sorted so that they will be processed/promoted
+  // deterministically.
+  using ContextSamplesTy = std::set<FunctionSamples *, ProfileComparer>;
+
+  SampleContextTracker(SampleProfileMap &Profiles,
+                       const DenseMap<uint64_t, StringRef> *GUIDToFuncNameMap);
   // Query context profile for a specific callee with given name at a given
   // call-site. The full context is identified by location of call instruction.
   FunctionSamples *getCalleeContextSamplesFor(const CallBase &Inst,
@@ -116,6 +135,8 @@ public:
   FunctionSamples *getBaseSamplesFor(StringRef Name, bool MergeContext = true);
   // Retrieve the context trie node for given profile context
   ContextTrieNode *getContextFor(const SampleContext &Context);
+  // Get real function name for a given trie node.
+  StringRef getFuncNameFor(ContextTrieNode *Node) const;
   // Mark a context profile as inlined when function is inlined.
   // This makes sure that inlined context profile will be excluded in
   // function's base profile.
@@ -136,14 +157,18 @@ private:
   ContextTrieNode &addTopLevelContextNode(StringRef FName);
   ContextTrieNode &promoteMergeContextSamplesTree(ContextTrieNode &NodeToPromo);
   void mergeContextNode(ContextTrieNode &FromNode, ContextTrieNode &ToNode,
-                        StringRef ContextStrToRemove);
-  ContextTrieNode &promoteMergeContextSamplesTree(ContextTrieNode &FromNode,
-                                                  ContextTrieNode &ToNodeParent,
-                                                  StringRef ContextStrToRemove);
+                        uint32_t ContextFramesToRemove);
+  ContextTrieNode &
+  promoteMergeContextSamplesTree(ContextTrieNode &FromNode,
+                                 ContextTrieNode &ToNodeParent,
+                                 uint32_t ContextFramesToRemove);
 
   // Map from function name to context profiles (excluding base profile)
   StringMap<ContextSamplesTy> FuncToCtxtProfiles;
 
+  // Map from function guid to real function names. Only used in md5 mode.
+  const DenseMap<uint64_t, StringRef> *GUIDToFuncNameMap;
+
   // Root node for context trie tree
   ContextTrieNode RootContext;
 };
diff --git a/llvm/include/llvm/Transforms/InstCombine/InstCombine.h b/llvm/include/llvm/Transforms/InstCombine/InstCombine.h
index af6d2a18a25a..6dee38c83b36 100644
--- a/llvm/include/llvm/Transforms/InstCombine/InstCombine.h
+++ b/llvm/include/llvm/Transforms/InstCombine/InstCombine.h
@@ -18,12 +18,14 @@
 
 #include "llvm/IR/Function.h"
 #include "llvm/IR/PassManager.h"
-#include "llvm/Transforms/InstCombine/InstCombineWorklist.h"
+
+#define DEBUG_TYPE "instcombine"
+#include "llvm/Transforms/Utils/InstructionWorklist.h"
 
 namespace llvm {
 
 class InstCombinePass : public PassInfoMixin<InstCombinePass> {
-  InstCombineWorklist Worklist;
+  InstructionWorklist Worklist;
   const unsigned MaxIterations;
 
 public:
@@ -38,7 +40,7 @@ public:
 /// This is a basic whole-function wrapper around the instcombine utility. It
 /// will try to combine all instructions in the function.
 class InstructionCombiningPass : public FunctionPass {
-  InstCombineWorklist Worklist;
+  InstructionWorklist Worklist;
   const unsigned MaxIterations;
 
 public:
@@ -67,4 +69,6 @@ FunctionPass *createInstructionCombiningPass();
 FunctionPass *createInstructionCombiningPass(unsigned MaxIterations);
 }
 
+#undef DEBUG_TYPE
+
 #endif
diff --git a/llvm/include/llvm/Transforms/InstCombine/InstCombineWorklist.h b/llvm/include/llvm/Transforms/InstCombine/InstCombineWorklist.h
deleted file mode 100644
index 25aabe199d0f..000000000000
--- a/llvm/include/llvm/Transforms/InstCombine/InstCombineWorklist.h
+++ /dev/null
@@ -1,128 +0,0 @@
-//===- InstCombineWorklist.h - Worklist for InstCombine pass ----*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TRANSFORMS_INSTCOMBINE_INSTCOMBINEWORKLIST_H
-#define LLVM_TRANSFORMS_INSTCOMBINE_INSTCOMBINEWORKLIST_H
-
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-
-#define DEBUG_TYPE "instcombine"
-
-namespace llvm {
-
-/// InstCombineWorklist - This is the worklist management logic for
-/// InstCombine.
-class InstCombineWorklist {
-  SmallVector<Instruction *, 256> Worklist;
-  DenseMap<Instruction *, unsigned> WorklistMap;
-  /// These instructions will be added in reverse order after the current
-  /// combine has finished. This means that these instructions will be visited
-  /// in the order they have been added.
-  SmallSetVector<Instruction *, 16> Deferred;
-
-public:
-  InstCombineWorklist() = default;
-
-  InstCombineWorklist(InstCombineWorklist &&) = default;
-  InstCombineWorklist &operator=(InstCombineWorklist &&) = default;
-
-  bool isEmpty() const { return Worklist.empty() && Deferred.empty(); }
-
-  /// Add instruction to the worklist.
-  /// Instructions will be visited in the order they are added.
-  /// You likely want to use this method.
-  void add(Instruction *I) {
-    if (Deferred.insert(I))
-      LLVM_DEBUG(dbgs() << "IC: ADD DEFERRED: " << *I << '\n');
-  }
-
-  /// Add value to the worklist if it is an instruction.
-  /// Instructions will be visited in the order they are added.
-  void addValue(Value *V) {
-    if (Instruction *I = dyn_cast<Instruction>(V))
-      add(I);
-  }
-
-  /// Push the instruction onto the worklist stack.
-  /// Instructions that have been added first will be visited last.
-  void push(Instruction *I) {
-    assert(I);
-    assert(I->getParent() && "Instruction not inserted yet?");
-
-    if (WorklistMap.insert(std::make_pair(I, Worklist.size())).second) {
-      LLVM_DEBUG(dbgs() << "IC: ADD: " << *I << '\n');
-      Worklist.push_back(I);
-    }
-  }
-
-  void pushValue(Value *V) {
-    if (Instruction *I = dyn_cast<Instruction>(V))
-      push(I);
-  }
-
-  Instruction *popDeferred() {
-    if (Deferred.empty())
-      return nullptr;
-    return Deferred.pop_back_val();
-  }
-
-  void reserve(size_t Size) {
-    Worklist.reserve(Size + 16);
-    WorklistMap.reserve(Size);
-  }
-
-  /// Remove I from the worklist if it exists.
-  void remove(Instruction *I) {
-    DenseMap<Instruction*, unsigned>::iterator It = WorklistMap.find(I);
-    if (It != WorklistMap.end()) {
-      // Don't bother moving everything down, just null out the slot.
-      Worklist[It->second] = nullptr;
-      WorklistMap.erase(It);
-    }
-
-    Deferred.remove(I);
-  }
-
-  Instruction *removeOne() {
-    if (Worklist.empty())
-      return nullptr;
-    Instruction *I = Worklist.pop_back_val();
-    WorklistMap.erase(I);
-    return I;
-  }
-
-  /// When an instruction is simplified, add all users of the instruction
-  /// to the work lists because they might get more simplified now.
-  void pushUsersToWorkList(Instruction &I) {
-    for (User *U : I.users())
-      push(cast<Instruction>(U));
-  }
-
-
-  /// Check that the worklist is empty and nuke the backing store for the map.
-  void zap() {
-    assert(WorklistMap.empty() && "Worklist empty, but map not?");
-    assert(Deferred.empty() && "Deferred instructions left over");
-
-    // Do an explicit clear, this shrinks the map if needed.
-    WorklistMap.clear();
-  }
-};
-
-} // end namespace llvm.
-
-#undef DEBUG_TYPE
-
-#endif
diff --git a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
index ba0d41f9b748..c6aee439b5a0 100644
--- a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
+++ b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
@@ -25,10 +25,10 @@
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/KnownBits.h"
-#include "llvm/Transforms/InstCombine/InstCombineWorklist.h"
 #include <cassert>
 
 #define DEBUG_TYPE "instcombine"
+#include "llvm/Transforms/Utils/InstructionWorklist.h"
 
 namespace llvm {
 
@@ -43,7 +43,9 @@ class TargetTransformInfo;
 /// This class provides both the logic to recursively visit instructions and
 /// combine them.
 class LLVM_LIBRARY_VISIBILITY InstCombiner {
-  /// Only used to call target specific inst combining.
+  /// Only used to call target specific intrinsic combining.
+  /// It must **NOT** be used for any other purpose, as InstCombine is a
+  /// target-independent canonicalization transform.
   TargetTransformInfo &TTI;
 
 public:
@@ -57,7 +59,7 @@ public:
 
 protected:
   /// A worklist of the instructions that need to be simplified.
-  InstCombineWorklist &Worklist;
+  InstructionWorklist &Worklist;
 
   // Mode in which we are running the combiner.
   const bool MinimizeSize;
@@ -81,7 +83,7 @@ protected:
   bool MadeIRChange = false;
 
 public:
-  InstCombiner(InstCombineWorklist &Worklist, BuilderTy &Builder,
+  InstCombiner(InstructionWorklist &Worklist, BuilderTy &Builder,
                bool MinimizeSize, AAResults *AA, AssumptionCache &AC,
                TargetLibraryInfo &TLI, TargetTransformInfo &TTI,
                DominatorTree &DT, OptimizationRemarkEmitter &ORE,
@@ -165,16 +167,16 @@ public:
     switch (Pred) {
     case ICmpInst::ICMP_SLT: // True if LHS s< 0
       TrueIfSigned = true;
-      return RHS.isNullValue();
+      return RHS.isZero();
     case ICmpInst::ICMP_SLE: // True if LHS s<= -1
       TrueIfSigned = true;
-      return RHS.isAllOnesValue();
+      return RHS.isAllOnes();
     case ICmpInst::ICMP_SGT: // True if LHS s> -1
       TrueIfSigned = false;
-      return RHS.isAllOnesValue();
+      return RHS.isAllOnes();
     case ICmpInst::ICMP_SGE: // True if LHS s>= 0
       TrueIfSigned = false;
-      return RHS.isNullValue();
+      return RHS.isZero();
     case ICmpInst::ICMP_UGT:
       // True if LHS u> RHS and RHS == sign-bit-mask - 1
       TrueIfSigned = true;
@@ -246,12 +248,13 @@ public:
 
     // If `V` is of the form `A + Constant` then `-1 - V` can be folded into
     // `(-1 - Constant) - A` if we are willing to invert all of the uses.
-    if (BinaryOperator *BO = dyn_cast<BinaryOperator>(V))
-      if (BO->getOpcode() == Instruction::Add ||
-          BO->getOpcode() == Instruction::Sub)
-        if (match(BO, PatternMatch::m_c_BinOp(PatternMatch::m_Value(),
-                                              PatternMatch::m_ImmConstant())))
-          return WillInvertAllUses;
+    if (match(V, m_Add(PatternMatch::m_Value(), PatternMatch::m_ImmConstant())))
+      return WillInvertAllUses;
+
+    // If `V` is of the form `Constant - A` then `-1 - V` can be folded into
+    // `A + (-1 - Constant)` if we are willing to invert all of the uses.
+    if (match(V, m_Sub(PatternMatch::m_ImmConstant(), PatternMatch::m_Value())))
+      return WillInvertAllUses;
 
     // Selects with invertible operands are freely invertible
     if (match(V,
@@ -259,6 +262,12 @@ public:
                        m_Not(PatternMatch::m_Value()))))
       return WillInvertAllUses;
 
+    // Min/max may be in the form of intrinsics, so handle those identically
+    // to select patterns.
+    if (match(V, m_MaxOrMin(m_Not(PatternMatch::m_Value()),
+                            m_Not(PatternMatch::m_Value()))))
+      return WillInvertAllUses;
+
     return false;
   }
 
@@ -354,14 +363,6 @@ public:
     return ConstantVector::get(Out);
   }
 
-  /// Create and insert the idiom we use to indicate a block is unreachable
-  /// without having to rewrite the CFG from within InstCombine.
-  static void CreateNonTerminatorUnreachable(Instruction *InsertAt) {
-    auto &Ctx = InsertAt->getContext();
-    new StoreInst(ConstantInt::getTrue(Ctx),
-                  UndefValue::get(Type::getInt1PtrTy(Ctx)), InsertAt);
-  }
-
   void addToWorklist(Instruction *I) { Worklist.push(I); }
 
   AssumptionCache &getAssumptionCache() const { return AC; }
@@ -479,6 +480,11 @@ public:
     return llvm::ComputeNumSignBits(Op, DL, Depth, &AC, CxtI, &DT);
   }
 
+  unsigned ComputeMinSignedBits(const Value *Op, unsigned Depth = 0,
+                                const Instruction *CxtI = nullptr) const {
+    return llvm::ComputeMinSignedBits(Op, DL, Depth, &AC, CxtI, &DT);
+  }
+
   OverflowResult computeOverflowForUnsignedMul(const Value *LHS,
                                                const Value *RHS,
                                                const Instruction *CxtI) const {
diff --git a/llvm/include/llvm/Transforms/Instrumentation.h b/llvm/include/llvm/Transforms/Instrumentation.h
index 03108bacb0da..a288a3972c3d 100644
--- a/llvm/include/llvm/Transforms/Instrumentation.h
+++ b/llvm/include/llvm/Transforms/Instrumentation.h
@@ -78,7 +78,7 @@ struct GCOVOptions {
 ModulePass *createGCOVProfilerPass(const GCOVOptions &Options =
                                    GCOVOptions::getDefault());
 
-// PGO Instrumention. Parameter IsCS indicates if this is the context senstive
+// PGO Instrumention. Parameter IsCS indicates if this is the context sensitive
 // instrumentation.
 ModulePass *createPGOInstrumentationGenLegacyPass(bool IsCS = false);
 ModulePass *
@@ -138,7 +138,7 @@ struct InstrProfOptions {
 };
 
 /// Insert frontend instrumentation based profiling. Parameter IsCS indicates if
-// this is the context senstive instrumentation.
+// this is the context sensitive instrumentation.
 ModulePass *createInstrProfilingLegacyPass(
     const InstrProfOptions &Options = InstrProfOptions(), bool IsCS = false);
 
@@ -169,6 +169,8 @@ struct SanitizerCoverageOptions {
   bool PCTable = false;
   bool NoPrune = false;
   bool StackDepth = false;
+  bool TraceLoads = false;
+  bool TraceStores = false;
 
   SanitizerCoverageOptions() = default;
 };
diff --git a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h
index 3781253d2694..c13407a44091 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h
@@ -1,9 +1,8 @@
 //===--------- Definition of the AddressSanitizer class ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -90,6 +89,14 @@ private:
   static AnalysisKey Key;
 };
 
+struct AddressSanitizerOptions {
+  bool CompileKernel = false;
+  bool Recover = false;
+  bool UseAfterScope = false;
+  AsanDetectStackUseAfterReturnMode UseAfterReturn =
+      AsanDetectStackUseAfterReturnMode::Runtime;
+};
+
 /// Public interface to the address sanitizer pass for instrumenting code to
 /// check for various memory errors at runtime.
 ///
@@ -99,19 +106,15 @@ private:
 /// surrounding requested memory to be checked for invalid accesses.
 class AddressSanitizerPass : public PassInfoMixin<AddressSanitizerPass> {
 public:
-  explicit AddressSanitizerPass(
-      bool CompileKernel = false, bool Recover = false,
-      bool UseAfterScope = false,
-      AsanDetectStackUseAfterReturnMode UseAfterReturn =
-          AsanDetectStackUseAfterReturnMode::Runtime);
+  AddressSanitizerPass(const AddressSanitizerOptions &Options)
+      : Options(Options){};
   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+  void printPipeline(raw_ostream &OS,
+                     function_ref<StringRef(StringRef)> MapClassName2PassName);
   static bool isRequired() { return true; }
 
 private:
-  bool CompileKernel;
-  bool Recover;
-  bool UseAfterScope;
-  AsanDetectStackUseAfterReturnMode UseAfterReturn;
+  AddressSanitizerOptions Options;
 };
 
 /// Public interface to the address sanitizer module pass for instrumenting code
@@ -122,16 +125,17 @@ private:
 class ModuleAddressSanitizerPass
     : public PassInfoMixin<ModuleAddressSanitizerPass> {
 public:
-  explicit ModuleAddressSanitizerPass(
-      bool CompileKernel = false, bool Recover = false, bool UseGlobalGC = true,
+  ModuleAddressSanitizerPass(
+      const AddressSanitizerOptions &Options, bool UseGlobalGC = true,
       bool UseOdrIndicator = false,
       AsanDtorKind DestructorKind = AsanDtorKind::Global);
   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+  void printPipeline(raw_ostream &OS,
+                     function_ref<StringRef(StringRef)> MapClassName2PassName);
   static bool isRequired() { return true; }
 
 private:
-  bool CompileKernel;
-  bool Recover;
+  AddressSanitizerOptions Options;
   bool UseGlobalGC;
   bool UseOdrIndicator;
   AsanDtorKind DestructorKind;
@@ -148,6 +152,16 @@ ModulePass *createModuleAddressSanitizerLegacyPassPass(
     bool UseOdrIndicator = true,
     AsanDtorKind DestructorKind = AsanDtorKind::Global);
 
+struct ASanAccessInfo {
+  const int32_t Packed;
+  const uint8_t AccessSizeIndex;
+  const bool IsWrite;
+  const bool CompileKernel;
+
+  explicit ASanAccessInfo(int32_t Packed);
+  ASanAccessInfo(bool IsWrite, bool CompileKernel, uint8_t AccessSizeIndex);
+};
+
 } // namespace llvm
 
 #endif
diff --git a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h
index 0228992af874..6c351e3f8e1f 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h
@@ -1,9 +1,8 @@
 //===--------- Definition of the AddressSanitizer class ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -18,6 +17,7 @@
 #include "llvm/Analysis/PostDominators.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Instruction.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Module.h"
 
 namespace llvm {
@@ -26,7 +26,6 @@ class InterestingMemoryOperand {
 public:
   Use *PtrUse;
   bool IsWrite;
-  Type *OpType;
   uint64_t TypeSize;
   MaybeAlign Alignment;
   // The mask Value, if we're looking at a masked load/store.
@@ -35,8 +34,7 @@ public:
   InterestingMemoryOperand(Instruction *I, unsigned OperandNo, bool IsWrite,
                            class Type *OpType, MaybeAlign Alignment,
                            Value *MaybeMask = nullptr)
-      : IsWrite(IsWrite), OpType(OpType), Alignment(Alignment),
-        MaybeMask(MaybeMask) {
+      : IsWrite(IsWrite), Alignment(Alignment), MaybeMask(MaybeMask) {
     const DataLayout &DL = I->getModule()->getDataLayout();
     TypeSize = DL.getTypeStoreSizeInBits(OpType);
     PtrUse = &I->getOperandUse(OperandNo);
@@ -47,47 +45,56 @@ public:
   Value *getPtr() { return PtrUse->get(); }
 };
 
-// For an alloca valid between lifetime markers Start and End, call the
+// For an alloca valid between lifetime markers Start and Ends, call the
 // Callback for all possible exits out of the lifetime in the containing
 // function, which can return from the instructions in RetVec.
 //
-// Returns whether End was the only possible exit. If it wasn't, the caller
-// should remove End to ensure that work done at the other exits does not
-// happen outside of the lifetime.
+// Returns whether Ends covered all possible exits. If they did not,
+// the caller should remove Ends to ensure that work done at the other
+// exits does not happen outside of the lifetime.
 template <typename F>
 bool forAllReachableExits(const DominatorTree &DT, const PostDominatorTree &PDT,
-                          const Instruction *Start, Instruction *End,
+                          const Instruction *Start,
+                          const SmallVectorImpl<IntrinsicInst *> &Ends,
                           const SmallVectorImpl<Instruction *> &RetVec,
                           F Callback) {
-  // We need to ensure that if we tag some object, we certainly untag it
-  // before the function exits.
-  if (PDT.dominates(End, Start)) {
-    Callback(End);
-  } else {
-    SmallVector<Instruction *, 8> ReachableRetVec;
-    unsigned NumCoveredExits = 0;
-    for (auto &RI : RetVec) {
-      if (!isPotentiallyReachable(Start, RI, nullptr, &DT))
-        continue;
-      ReachableRetVec.push_back(RI);
-      if (DT.dominates(End, RI))
-        ++NumCoveredExits;
-    }
-    // If there's a mix of covered and non-covered exits, just put the untag
-    // on exits, so we avoid the redundancy of untagging twice.
-    if (NumCoveredExits == ReachableRetVec.size()) {
+  if (Ends.size() == 1 && PDT.dominates(Ends[0], Start)) {
+    Callback(Ends[0]);
+    return true;
+  }
+  SmallVector<Instruction *, 8> ReachableRetVec;
+  unsigned NumCoveredExits = 0;
+  for (auto &RI : RetVec) {
+    if (!isPotentiallyReachable(Start, RI, nullptr, &DT))
+      continue;
+    ReachableRetVec.push_back(RI);
+    // TODO(fmayer): We don't support diamond shapes, where multiple lifetime
+    // ends together dominate the RI, but none of them does by itself.
+    // Check how often this happens and decide whether to support this here.
+    if (std::any_of(Ends.begin(), Ends.end(),
+                    [&](Instruction *End) { return DT.dominates(End, RI); }))
+      ++NumCoveredExits;
+  }
+  // If there's a mix of covered and non-covered exits, just put the untag
+  // on exits, so we avoid the redundancy of untagging twice.
+  if (NumCoveredExits == ReachableRetVec.size()) {
+    for (auto *End : Ends)
       Callback(End);
-    } else {
-      for (auto &RI : ReachableRetVec)
-        Callback(RI);
-      // We may have inserted untag outside of the lifetime interval.
-      // Signal the caller to remove the lifetime end call for this alloca.
-      return false;
-    }
+  } else {
+    for (auto &RI : ReachableRetVec)
+      Callback(RI);
+    // We may have inserted untag outside of the lifetime interval.
+    // Signal the caller to remove the lifetime end call for this alloca.
+    return false;
   }
   return true;
 }
 
+// Get AddressSanitizer parameters.
+void getAddressSanitizerParams(const Triple &TargetTriple, int LongSize,
+                               bool IsKasan, uint64_t *ShadowBase,
+                               int *MappingScale, bool *OrShadowOffset);
+
 } // namespace llvm
 
 #endif
diff --git a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerOptions.h b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerOptions.h
index 029b3fc4b788..f019d1c00a35 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerOptions.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerOptions.h
@@ -1,9 +1,8 @@
 //===--------- Definition of the AddressSanitizer options -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // This file defines data types used to set Address Sanitizer options.
diff --git a/llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h
index 2e4f3338030a..3118a3762935 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h
@@ -1,9 +1,8 @@
 //===--------- Definition of the HWAddressSanitizer class -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -19,21 +18,32 @@
 
 namespace llvm {
 
+struct HWAddressSanitizerOptions {
+  HWAddressSanitizerOptions()
+      : HWAddressSanitizerOptions(false, false, false){};
+  HWAddressSanitizerOptions(bool CompileKernel, bool Recover,
+                            bool DisableOptimization)
+      : CompileKernel(CompileKernel), Recover(Recover),
+        DisableOptimization(DisableOptimization){};
+  bool CompileKernel;
+  bool Recover;
+  bool DisableOptimization;
+};
+
 /// This is a public interface to the hardware address sanitizer pass for
 /// instrumenting code to check for various memory errors at runtime, similar to
 /// AddressSanitizer but based on partial hardware assistance.
 class HWAddressSanitizerPass : public PassInfoMixin<HWAddressSanitizerPass> {
 public:
-  explicit HWAddressSanitizerPass(bool CompileKernel = false,
-                                  bool Recover = false,
-                                  bool DisableOptimization = false);
+  explicit HWAddressSanitizerPass(HWAddressSanitizerOptions Options)
+      : Options(Options){};
   PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
   static bool isRequired() { return true; }
+  void printPipeline(raw_ostream &OS,
+                     function_ref<StringRef(StringRef)> MapClassName2PassName);
 
 private:
-  bool CompileKernel;
-  bool Recover;
-  bool DisableOptimization;
+  HWAddressSanitizerOptions Options;
 };
 
 FunctionPass *
diff --git a/llvm/include/llvm/Transforms/Instrumentation/InstrOrderFile.h b/llvm/include/llvm/Transforms/Instrumentation/InstrOrderFile.h
index f0f375e0acf6..e3d75f675c93 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/InstrOrderFile.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/InstrOrderFile.h
@@ -1,9 +1,8 @@
 //===- InstrOrderFile.h ---- Late IR instrumentation for order file ----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h b/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h
index ac6a07d299a6..f4d1b1d90e6f 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h
@@ -1,9 +1,8 @@
 //===--------- Definition of the MemProfiler class --------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/llvm/include/llvm/Transforms/Instrumentation/MemorySanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/MemorySanitizer.h
index f5f9ec7829bd..d47beb93397e 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/MemorySanitizer.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/MemorySanitizer.h
@@ -40,6 +40,23 @@ struct MemorySanitizerPass : public PassInfoMixin<MemorySanitizerPass> {
   MemorySanitizerPass(MemorySanitizerOptions Options) : Options(Options) {}
 
   PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
+  void printPipeline(raw_ostream &OS,
+                     function_ref<StringRef(StringRef)> MapClassName2PassName);
+  static bool isRequired() { return true; }
+
+private:
+  MemorySanitizerOptions Options;
+};
+
+/// A module pass for msan instrumentation.
+///
+/// Instruments functions to detect unitialized reads. This function pass
+/// inserts calls to runtime library functions. If the functions aren't declared
+/// yet, the pass inserts the declarations. Otherwise the existing globals are
+/// used.
+struct ModuleMemorySanitizerPass : public PassInfoMixin<ModuleMemorySanitizerPass> {
+  ModuleMemorySanitizerPass(MemorySanitizerOptions Options) : Options(Options) {}
+
   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
   static bool isRequired() { return true; }
 
diff --git a/llvm/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h
index f9c507624e6d..e795043630d5 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h
@@ -27,6 +27,14 @@ FunctionPass *createThreadSanitizerLegacyPassPass();
 /// yet, the pass inserts the declarations. Otherwise the existing globals are
 struct ThreadSanitizerPass : public PassInfoMixin<ThreadSanitizerPass> {
   PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
+  static bool isRequired() { return true; }
+};
+
+/// A module pass for tsan instrumentation.
+///
+/// Create ctor and init functions.
+struct ModuleThreadSanitizerPass
+  : public PassInfoMixin<ModuleThreadSanitizerPass> {
   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
   static bool isRequired() { return true; }
 };
diff --git a/llvm/include/llvm/Transforms/Scalar/EarlyCSE.h b/llvm/include/llvm/Transforms/Scalar/EarlyCSE.h
index 1e7fd71dcbf4..877d8145e746 100644
--- a/llvm/include/llvm/Transforms/Scalar/EarlyCSE.h
+++ b/llvm/include/llvm/Transforms/Scalar/EarlyCSE.h
@@ -32,6 +32,8 @@ struct EarlyCSEPass : PassInfoMixin<EarlyCSEPass> {
 
   /// Run the pass over the function.
   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+  void printPipeline(raw_ostream &OS,
+                     function_ref<StringRef(StringRef)> MapClassName2PassName);
 
   bool UseMemorySSA;
 };
diff --git a/llvm/include/llvm/Transforms/Scalar/GVN.h b/llvm/include/llvm/Transforms/Scalar/GVN.h
index 5c29b289d158..cbe5057b9cde 100644
--- a/llvm/include/llvm/Transforms/Scalar/GVN.h
+++ b/llvm/include/llvm/Transforms/Scalar/GVN.h
@@ -115,17 +115,20 @@ struct GVNOptions {
 ///
 /// FIXME: We should have a good summary of the GVN algorithm implemented by
 /// this particular pass here.
-class GVN : public PassInfoMixin<GVN> {
+class GVNPass : public PassInfoMixin<GVNPass> {
   GVNOptions Options;
 
 public:
   struct Expression;
 
-  GVN(GVNOptions Options = {}) : Options(Options) {}
+  GVNPass(GVNOptions Options = {}) : Options(Options) {}
 
   /// Run the pass over the function.
   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
 
+  void printPipeline(raw_ostream &OS,
+                     function_ref<StringRef(StringRef)> MapClassName2PassName);
+
   /// This removes the specified instruction from
   /// our various maps and marks it for deletion.
   void markInstructionForDeletion(Instruction *I) {
@@ -179,11 +182,11 @@ public:
     Expression createExtractvalueExpr(ExtractValueInst *EI);
     uint32_t lookupOrAddCall(CallInst *C);
     uint32_t phiTranslateImpl(const BasicBlock *BB, const BasicBlock *PhiBlock,
-                              uint32_t Num, GVN &Gvn);
+                              uint32_t Num, GVNPass &Gvn);
     bool areCallValsEqual(uint32_t Num, uint32_t NewNum, const BasicBlock *Pred,
-                          const BasicBlock *PhiBlock, GVN &Gvn);
+                          const BasicBlock *PhiBlock, GVNPass &Gvn);
     std::pair<uint32_t, bool> assignExpNewValueNum(Expression &exp);
-    bool areAllValsInBB(uint32_t num, const BasicBlock *BB, GVN &Gvn);
+    bool areAllValsInBB(uint32_t num, const BasicBlock *BB, GVNPass &Gvn);
 
   public:
     ValueTable();
@@ -197,7 +200,7 @@ public:
     uint32_t lookupOrAddCmp(unsigned Opcode, CmpInst::Predicate Pred,
                             Value *LHS, Value *RHS);
     uint32_t phiTranslate(const BasicBlock *BB, const BasicBlock *PhiBlock,
-                          uint32_t Num, GVN &Gvn);
+                          uint32_t Num, GVNPass &Gvn);
     void eraseTranslateCacheEntry(uint32_t Num, const BasicBlock &CurrBlock);
     bool exists(Value *V) const;
     void add(Value *V, uint32_t num);
diff --git a/llvm/include/llvm/Transforms/Scalar/JumpThreading.h b/llvm/include/llvm/Transforms/Scalar/JumpThreading.h
index 816ea1071e52..0ac7d7c62b7a 100644
--- a/llvm/include/llvm/Transforms/Scalar/JumpThreading.h
+++ b/llvm/include/llvm/Transforms/Scalar/JumpThreading.h
@@ -44,6 +44,7 @@ class PHINode;
 class SelectInst;
 class SwitchInst;
 class TargetLibraryInfo;
+class TargetTransformInfo;
 class Value;
 
 /// A private "module" namespace for types and utilities used by
@@ -78,6 +79,7 @@ enum ConstantPreference { WantInteger, WantBlockAddress };
 /// revectored to the false side of the second if.
 class JumpThreadingPass : public PassInfoMixin<JumpThreadingPass> {
   TargetLibraryInfo *TLI;
+  TargetTransformInfo *TTI;
   LazyValueInfo *LVI;
   AAResults *AA;
   DomTreeUpdater *DTU;
@@ -99,9 +101,9 @@ public:
   JumpThreadingPass(bool InsertFreezeWhenUnfoldingSelect = false, int T = -1);
 
   // Glue for old PM.
-  bool runImpl(Function &F, TargetLibraryInfo *TLI, LazyValueInfo *LVI,
-               AAResults *AA, DomTreeUpdater *DTU, bool HasProfileData,
-               std::unique_ptr<BlockFrequencyInfo> BFI,
+  bool runImpl(Function &F, TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
+               LazyValueInfo *LVI, AAResults *AA, DomTreeUpdater *DTU,
+               bool HasProfileData, std::unique_ptr<BlockFrequencyInfo> BFI,
                std::unique_ptr<BranchProbabilityInfo> BPI);
 
   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
diff --git a/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h b/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h
index 020cfb9a6c85..419729271a23 100644
--- a/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h
+++ b/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h
@@ -94,6 +94,8 @@ public:
   PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
                         LoopStandardAnalysisResults &AR, LPMUpdater &U);
 
+  void printPipeline(raw_ostream &OS,
+                     function_ref<StringRef(StringRef)> MapClassName2PassName);
   /// Add either a loop pass or a loop-nest pass to the pass manager. Append \p
   /// Pass to the list of loop passes if it has a dedicated \fn run() method for
   /// loops and to the list of loop-nest passes if the \fn run() method is for
@@ -101,51 +103,65 @@ public:
   /// to the end of \var IsLoopNestPass so we can easily identify the types of
   /// passes in the pass manager later.
   template <typename PassT>
-  std::enable_if_t<is_detected<HasRunOnLoopT, PassT>::value>
-  addPass(PassT &&Pass) {
+  LLVM_ATTRIBUTE_MINSIZE
+      std::enable_if_t<is_detected<HasRunOnLoopT, PassT>::value>
+      addPass(PassT &&Pass) {
     using LoopPassModelT =
         detail::PassModel<Loop, PassT, PreservedAnalyses, LoopAnalysisManager,
                           LoopStandardAnalysisResults &, LPMUpdater &>;
     IsLoopNestPass.push_back(false);
-    LoopPasses.emplace_back(new LoopPassModelT(std::forward<PassT>(Pass)));
+    // Do not use make_unique or emplace_back, they cause too many template
+    // instantiations, causing terrible compile times.
+    LoopPasses.push_back(std::unique_ptr<LoopPassConceptT>(
+        new LoopPassModelT(std::forward<PassT>(Pass))));
   }
 
   template <typename PassT>
-  std::enable_if_t<!is_detected<HasRunOnLoopT, PassT>::value>
-  addPass(PassT &&Pass) {
+  LLVM_ATTRIBUTE_MINSIZE
+      std::enable_if_t<!is_detected<HasRunOnLoopT, PassT>::value>
+      addPass(PassT &&Pass) {
     using LoopNestPassModelT =
         detail::PassModel<LoopNest, PassT, PreservedAnalyses,
                           LoopAnalysisManager, LoopStandardAnalysisResults &,
                           LPMUpdater &>;
     IsLoopNestPass.push_back(true);
-    LoopNestPasses.emplace_back(
-        new LoopNestPassModelT(std::forward<PassT>(Pass)));
+    // Do not use make_unique or emplace_back, they cause too many template
+    // instantiations, causing terrible compile times.
+    LoopNestPasses.push_back(std::unique_ptr<LoopNestPassConceptT>(
+        new LoopNestPassModelT(std::forward<PassT>(Pass))));
   }
 
   // Specializations of `addPass` for `RepeatedPass`. These are necessary since
   // `RepeatedPass` has a templated `run` method that will result in incorrect
   // detection of `HasRunOnLoopT`.
   template <typename PassT>
-  std::enable_if_t<is_detected<HasRunOnLoopT, PassT>::value>
-  addPass(RepeatedPass<PassT> &&Pass) {
+  LLVM_ATTRIBUTE_MINSIZE
+      std::enable_if_t<is_detected<HasRunOnLoopT, PassT>::value>
+      addPass(RepeatedPass<PassT> &&Pass) {
     using RepeatedLoopPassModelT =
         detail::PassModel<Loop, RepeatedPass<PassT>, PreservedAnalyses,
                           LoopAnalysisManager, LoopStandardAnalysisResults &,
                           LPMUpdater &>;
     IsLoopNestPass.push_back(false);
-    LoopPasses.emplace_back(new RepeatedLoopPassModelT(std::move(Pass)));
+    // Do not use make_unique or emplace_back, they cause too many template
+    // instantiations, causing terrible compile times.
+    LoopPasses.push_back(std::unique_ptr<LoopPassConceptT>(
+        new RepeatedLoopPassModelT(std::move(Pass))));
   }
 
   template <typename PassT>
-  std::enable_if_t<!is_detected<HasRunOnLoopT, PassT>::value>
-  addPass(RepeatedPass<PassT> &&Pass) {
+  LLVM_ATTRIBUTE_MINSIZE
+      std::enable_if_t<!is_detected<HasRunOnLoopT, PassT>::value>
+      addPass(RepeatedPass<PassT> &&Pass) {
     using RepeatedLoopNestPassModelT =
         detail::PassModel<LoopNest, RepeatedPass<PassT>, PreservedAnalyses,
                           LoopAnalysisManager, LoopStandardAnalysisResults &,
                           LPMUpdater &>;
     IsLoopNestPass.push_back(true);
-    LoopNestPasses.emplace_back(
-        new RepeatedLoopNestPassModelT(std::move(Pass)));
+    // Do not use make_unique or emplace_back, they cause too many template
+    // instantiations, causing terrible compile times.
+    LoopNestPasses.push_back(std::unique_ptr<LoopNestPassConceptT>(
+        new RepeatedLoopNestPassModelT(std::move(Pass))));
   }
 
   bool isEmpty() const { return LoopPasses.empty() && LoopNestPasses.empty(); }
@@ -215,6 +231,12 @@ struct RequireAnalysisPass<AnalysisT, Loop, LoopAnalysisManager,
     (void)AM.template getResult<AnalysisT>(L, AR);
     return PreservedAnalyses::all();
   }
+  void printPipeline(raw_ostream &OS,
+                     function_ref<StringRef(StringRef)> MapClassName2PassName) {
+    auto ClassName = AnalysisT::name();
+    auto PassName = MapClassName2PassName(ClassName);
+    OS << "require<" << PassName << ">";
+  }
 };
 
 /// An alias template to easily name a require analysis loop pass.
@@ -259,8 +281,6 @@ public:
   /// state, this routine will mark that the current loop should be skipped by
   /// the rest of the pass management infrastructure.
   void markLoopAsDeleted(Loop &L, llvm::StringRef Name) {
-    assert((!LoopNestMode || CurrentL == &L) &&
-           "L should be a top-level loop in loop-nest mode.");
     LAM.clear(L, Name);
     assert((&L == CurrentL || CurrentL->contains(&L)) &&
            "Cannot delete a loop outside of the "
@@ -413,10 +433,12 @@ public:
   explicit FunctionToLoopPassAdaptor(std::unique_ptr<PassConceptT> Pass,
                                      bool UseMemorySSA = false,
                                      bool UseBlockFrequencyInfo = false,
+                                     bool UseBranchProbabilityInfo = false,
                                      bool LoopNestMode = false)
       : Pass(std::move(Pass)), LoopCanonicalizationFPM(),
         UseMemorySSA(UseMemorySSA),
         UseBlockFrequencyInfo(UseBlockFrequencyInfo),
+        UseBranchProbabilityInfo(UseBranchProbabilityInfo),
         LoopNestMode(LoopNestMode) {
     LoopCanonicalizationFPM.addPass(LoopSimplifyPass());
     LoopCanonicalizationFPM.addPass(LCSSAPass());
@@ -424,6 +446,8 @@ public:
 
   /// Runs the loop passes across every loop in the function.
   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+  void printPipeline(raw_ostream &OS,
+                     function_ref<StringRef(StringRef)> MapClassName2PassName);
 
   static bool isRequired() { return true; }
 
@@ -436,6 +460,7 @@ private:
 
   bool UseMemorySSA = false;
   bool UseBlockFrequencyInfo = false;
+  bool UseBranchProbabilityInfo = false;
   const bool LoopNestMode;
 };
 
@@ -447,13 +472,17 @@ template <typename LoopPassT>
 inline std::enable_if_t<is_detected<HasRunOnLoopT, LoopPassT>::value,
                         FunctionToLoopPassAdaptor>
 createFunctionToLoopPassAdaptor(LoopPassT &&Pass, bool UseMemorySSA = false,
-                                bool UseBlockFrequencyInfo = false) {
+                                bool UseBlockFrequencyInfo = false,
+                                bool UseBranchProbabilityInfo = false) {
   using PassModelT =
       detail::PassModel<Loop, LoopPassT, PreservedAnalyses, LoopAnalysisManager,
                         LoopStandardAnalysisResults &, LPMUpdater &>;
+  // Do not use make_unique, it causes too many template instantiations,
+  // causing terrible compile times.
   return FunctionToLoopPassAdaptor(
-      std::make_unique<PassModelT>(std::forward<LoopPassT>(Pass)), UseMemorySSA,
-      UseBlockFrequencyInfo, false);
+      std::unique_ptr<FunctionToLoopPassAdaptor::PassConceptT>(
+          new PassModelT(std::forward<LoopPassT>(Pass))),
+      UseMemorySSA, UseBlockFrequencyInfo, UseBranchProbabilityInfo, false);
 }
 
 /// If \p Pass is a loop-nest pass, \p Pass will first be wrapped into a
@@ -462,24 +491,29 @@ template <typename LoopNestPassT>
 inline std::enable_if_t<!is_detected<HasRunOnLoopT, LoopNestPassT>::value,
                         FunctionToLoopPassAdaptor>
 createFunctionToLoopPassAdaptor(LoopNestPassT &&Pass, bool UseMemorySSA = false,
-                                bool UseBlockFrequencyInfo = false) {
+                                bool UseBlockFrequencyInfo = false,
+                                bool UseBranchProbabilityInfo = false) {
   LoopPassManager LPM;
   LPM.addPass(std::forward<LoopNestPassT>(Pass));
   using PassModelT =
       detail::PassModel<Loop, LoopPassManager, PreservedAnalyses,
                         LoopAnalysisManager, LoopStandardAnalysisResults &,
                         LPMUpdater &>;
-  return FunctionToLoopPassAdaptor(std::make_unique<PassModelT>(std::move(LPM)),
-                                   UseMemorySSA, UseBlockFrequencyInfo, true);
+  // Do not use make_unique, it causes too many template instantiations,
+  // causing terrible compile times.
+  return FunctionToLoopPassAdaptor(
+      std::unique_ptr<FunctionToLoopPassAdaptor::PassConceptT>(
+          new PassModelT(std::move(LPM))),
+      UseMemorySSA, UseBlockFrequencyInfo, UseBranchProbabilityInfo, true);
 }
 
 /// If \p Pass is an instance of \c LoopPassManager, the returned adaptor will
 /// be in loop-nest mode if the pass manager contains only loop-nest passes.
 template <>
 inline FunctionToLoopPassAdaptor
-createFunctionToLoopPassAdaptor<LoopPassManager>(LoopPassManager &&LPM,
-                                                 bool UseMemorySSA,
-                                                 bool UseBlockFrequencyInfo) {
+createFunctionToLoopPassAdaptor<LoopPassManager>(
+    LoopPassManager &&LPM, bool UseMemorySSA, bool UseBlockFrequencyInfo,
+    bool UseBranchProbabilityInfo) {
   // Check if LPM contains any loop pass and if it does not, returns an adaptor
   // in loop-nest mode.
   using PassModelT =
@@ -487,9 +521,13 @@ createFunctionToLoopPassAdaptor<LoopPassManager>(LoopPassManager &&LPM,
                         LoopAnalysisManager, LoopStandardAnalysisResults &,
                         LPMUpdater &>;
   bool LoopNestMode = (LPM.getNumLoopPasses() == 0);
-  return FunctionToLoopPassAdaptor(std::make_unique<PassModelT>(std::move(LPM)),
-                                   UseMemorySSA, UseBlockFrequencyInfo,
-                                   LoopNestMode);
+  // Do not use make_unique, it causes too many template instantiations,
+  // causing terrible compile times.
+  return FunctionToLoopPassAdaptor(
+      std::unique_ptr<FunctionToLoopPassAdaptor::PassConceptT>(
+          new PassModelT(std::move(LPM))),
+      UseMemorySSA, UseBlockFrequencyInfo, UseBranchProbabilityInfo,
+      LoopNestMode);
 }
 
 /// Pass for printing a loop's contents as textual IR.
diff --git a/llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h b/llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h
index 30cc08cb42ae..6afe7ecd2a5d 100644
--- a/llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h
+++ b/llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h
@@ -140,6 +140,8 @@ public:
       : UnrollOpts(UnrollOpts) {}
 
   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+  void printPipeline(raw_ostream &OS,
+                     function_ref<StringRef(StringRef)> MapClassName2PassName);
 };
 
 } // end namespace llvm
diff --git a/llvm/include/llvm/Transforms/Scalar/LowerMatrixIntrinsics.h b/llvm/include/llvm/Transforms/Scalar/LowerMatrixIntrinsics.h
index dd574e4f32c6..d44d297dd4ff 100644
--- a/llvm/include/llvm/Transforms/Scalar/LowerMatrixIntrinsics.h
+++ b/llvm/include/llvm/Transforms/Scalar/LowerMatrixIntrinsics.h
@@ -23,6 +23,8 @@ class LowerMatrixIntrinsicsPass
 public:
   LowerMatrixIntrinsicsPass(bool Minimal = false) : Minimal(Minimal) {}
   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+  void printPipeline(raw_ostream &OS,
+                     function_ref<StringRef(StringRef)> MapClassName2PassName);
   static bool isRequired() { return true; }
 };
 } // namespace llvm
diff --git a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
index 635b706d0bef..3a4db13d670a 100644
--- a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
+++ b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
@@ -31,7 +31,6 @@ class Instruction;
 class LoadInst;
 class MemCpyInst;
 class MemMoveInst;
-class MemoryDependenceResults;
 class MemorySSA;
 class MemorySSAUpdater;
 class MemSetInst;
@@ -40,7 +39,6 @@ class TargetLibraryInfo;
 class Value;
 
 class MemCpyOptPass : public PassInfoMixin<MemCpyOptPass> {
-  MemoryDependenceResults *MD = nullptr;
   TargetLibraryInfo *TLI = nullptr;
   AAResults *AA = nullptr;
   AssumptionCache *AC = nullptr;
@@ -54,9 +52,8 @@ public:
   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
 
   // Glue for the old PM.
-  bool runImpl(Function &F, MemoryDependenceResults *MD, TargetLibraryInfo *TLI,
-               AAResults *AA, AssumptionCache *AC, DominatorTree *DT,
-               MemorySSA *MSSA);
+  bool runImpl(Function &F, TargetLibraryInfo *TLI, AAResults *AA,
+               AssumptionCache *AC, DominatorTree *DT, MemorySSA *MSSA);
 
 private:
   // Helper functions
@@ -65,7 +62,7 @@ private:
   bool processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI);
   bool processMemMove(MemMoveInst *M);
   bool performCallSlotOptzn(Instruction *cpyLoad, Instruction *cpyStore,
-                            Value *cpyDst, Value *cpySrc, uint64_t cpyLen,
+                            Value *cpyDst, Value *cpySrc, TypeSize cpyLen,
                             Align cpyAlign, CallInst *C);
   bool processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep);
   bool processMemSetMemCpyDependence(MemCpyInst *MemCpy, MemSetInst *MemSet);
diff --git a/llvm/include/llvm/Transforms/Scalar/MergedLoadStoreMotion.h b/llvm/include/llvm/Transforms/Scalar/MergedLoadStoreMotion.h
index c5f6d6e0e8bd..256d03675a07 100644
--- a/llvm/include/llvm/Transforms/Scalar/MergedLoadStoreMotion.h
+++ b/llvm/include/llvm/Transforms/Scalar/MergedLoadStoreMotion.h
@@ -48,6 +48,8 @@ public:
   MergedLoadStoreMotionPass(const MergedLoadStoreMotionOptions &PassOptions)
       : Options(PassOptions) {}
   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+  void printPipeline(raw_ostream &OS,
+                     function_ref<StringRef(StringRef)> MapClassName2PassName);
 };
 }
 
diff --git a/llvm/include/llvm/Transforms/Scalar/SROA.h b/llvm/include/llvm/Transforms/Scalar/SROA.h
index 6ef7c6b22c0b..f1a43435d89a 100644
--- a/llvm/include/llvm/Transforms/Scalar/SROA.h
+++ b/llvm/include/llvm/Transforms/Scalar/SROA.h
@@ -62,7 +62,7 @@ class SROALegacyPass;
 ///    onto insert and extract operations on a vector value, and convert them to
 ///    this form. By doing so, it will enable promotion of vector aggregates to
 ///    SSA vector values.
-class SROA : public PassInfoMixin<SROA> {
+class SROAPass : public PassInfoMixin<SROAPass> {
   LLVMContext *C = nullptr;
   DominatorTree *DT = nullptr;
   AssumptionCache *AC = nullptr;
@@ -110,7 +110,7 @@ class SROA : public PassInfoMixin<SROA> {
   SetVector<SelectInst *, SmallVector<SelectInst *, 2>> SpeculatableSelects;
 
 public:
-  SROA() = default;
+  SROAPass() = default;
 
   /// Run the pass over the function.
   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
diff --git a/llvm/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h b/llvm/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h
index c1a9ab475ead..dfb1619c7f2a 100644
--- a/llvm/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h
+++ b/llvm/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h
@@ -69,6 +69,9 @@ public:
 
   PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
                         LoopStandardAnalysisResults &AR, LPMUpdater &U);
+
+  void printPipeline(raw_ostream &OS,
+                     function_ref<StringRef(StringRef)> MapClassName2PassName);
 };
 
 /// Create the legacy pass object for the simple loop unswitcher.
diff --git a/llvm/include/llvm/Transforms/Scalar/SimplifyCFG.h b/llvm/include/llvm/Transforms/Scalar/SimplifyCFG.h
index 7c5393851ae6..67db5031a443 100644
--- a/llvm/include/llvm/Transforms/Scalar/SimplifyCFG.h
+++ b/llvm/include/llvm/Transforms/Scalar/SimplifyCFG.h
@@ -41,6 +41,9 @@ public:
 
   /// Run the pass over the function.
   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+
+  void printPipeline(raw_ostream &OS,
+                     function_ref<StringRef(StringRef)> MapClassName2PassName);
 };
 
 }
diff --git a/llvm/include/llvm/Transforms/Utils/ASanStackFrameLayout.h b/llvm/include/llvm/Transforms/Utils/ASanStackFrameLayout.h
index 0b570c0d1342..f87588db4ee2 100644
--- a/llvm/include/llvm/Transforms/Utils/ASanStackFrameLayout.h
+++ b/llvm/include/llvm/Transforms/Utils/ASanStackFrameLayout.h
@@ -33,7 +33,7 @@ struct ASanStackVariableDescription {
   uint64_t Size;       // Size of the variable in bytes.
   size_t LifetimeSize; // Size in bytes to use for lifetime analysis check.
                        // Will be rounded up to Granularity.
-  size_t Alignment;    // Alignment of the variable (power of 2).
+  uint64_t Alignment;  // Alignment of the variable (power of 2).
   AllocaInst *AI;      // The actual AllocaInst.
   size_t Offset;       // Offset from the beginning of the frame;
                        // set by ComputeASanStackFrameLayout.
@@ -42,20 +42,20 @@ struct ASanStackVariableDescription {
 
 // Output data struct for ComputeASanStackFrameLayout.
 struct ASanStackFrameLayout {
-  size_t Granularity;     // Shadow granularity.
-  size_t FrameAlignment;  // Alignment for the entire frame.
-  size_t FrameSize;       // Size of the frame in bytes.
+  uint64_t Granularity;     // Shadow granularity.
+  uint64_t FrameAlignment;  // Alignment for the entire frame.
+  uint64_t FrameSize;       // Size of the frame in bytes.
 };
 
 ASanStackFrameLayout ComputeASanStackFrameLayout(
     // The array of stack variables. The elements may get reordered and changed.
     SmallVectorImpl<ASanStackVariableDescription> &Vars,
     // AddressSanitizer's shadow granularity. Usually 8, may also be 16, 32, 64.
-    size_t Granularity,
+    uint64_t Granularity,
     // The minimal size of the left-most redzone (header).
     // At least 4 pointer sizes, power of 2, and >= Granularity.
     // The resulting FrameSize should be multiple of MinHeaderSize.
-    size_t MinHeaderSize);
+    uint64_t MinHeaderSize);
 
 // Compute frame description, see DescribeAddressIfStack in ASan runtime.
 SmallString<64> ComputeASanStackFrameDescription(
diff --git a/llvm/include/llvm/Transforms/Utils/AddDiscriminators.h b/llvm/include/llvm/Transforms/Utils/AddDiscriminators.h
index f512c6c06331..0aee2fe95cad 100644
--- a/llvm/include/llvm/Transforms/Utils/AddDiscriminators.h
+++ b/llvm/include/llvm/Transforms/Utils/AddDiscriminators.h
@@ -24,6 +24,7 @@ class Function;
 class AddDiscriminatorsPass : public PassInfoMixin<AddDiscriminatorsPass> {
 public:
   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+  static bool isRequired() { return true; }
 };
 
 } // end namespace llvm
diff --git a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
index b45c1820bb20..8970afb3aeaa 100644
--- a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
@@ -129,6 +129,13 @@ void ReplaceInstWithInst(BasicBlock::InstListType &BIL,
 /// To. Copies DebugLoc from BI to I, if I doesn't already have a DebugLoc.
 void ReplaceInstWithInst(Instruction *From, Instruction *To);
 
+/// Check if we can prove that all paths starting from this block converge
+/// to a block that either has a @llvm.experimental.deoptimize call
+/// prior to its terminating return instruction or is terminated by unreachable.
+/// All blocks in the traversed sequence must have an unique successor, maybe
+/// except for the last one.
+bool IsBlockFollowedByDeoptOrUnreachable(const BasicBlock *BB);
+
 /// Option class for critical edge splitting.
 ///
 /// This provides a builder interface for overriding the default options used
@@ -214,29 +221,6 @@ BasicBlock *SplitKnownCriticalEdge(Instruction *TI, unsigned SuccNum,
                                        CriticalEdgeSplittingOptions(),
                                    const Twine &BBName = "");
 
-inline BasicBlock *
-SplitCriticalEdge(BasicBlock *BB, succ_iterator SI,
-                  const CriticalEdgeSplittingOptions &Options =
-                      CriticalEdgeSplittingOptions()) {
-  return SplitCriticalEdge(BB->getTerminator(), SI.getSuccessorIndex(),
-                           Options);
-}
-
-/// If the edge from *PI to BB is not critical, return false. Otherwise, split
-/// all edges between the two blocks and return true. This updates all of the
-/// same analyses as the other SplitCriticalEdge function. If P is specified, it
-/// updates the analyses described above.
-inline bool SplitCriticalEdge(BasicBlock *Succ, pred_iterator PI,
-                              const CriticalEdgeSplittingOptions &Options =
-                                  CriticalEdgeSplittingOptions()) {
-  bool MadeChange = false;
-  Instruction *TI = (*PI)->getTerminator();
-  for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
-    if (TI->getSuccessor(i) == Succ)
-      MadeChange |= !!SplitCriticalEdge(TI, i, Options);
-  return MadeChange;
-}
-
 /// If an edge from Src to Dst is critical, split the edge and return true,
 /// otherwise return false. This method requires that there be an edge between
 /// the two blocks. It updates the analyses passed in the options struct
diff --git a/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h b/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
index e7d41933a6c9..87d33b9b11b7 100644
--- a/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
+++ b/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
@@ -54,12 +54,6 @@ namespace llvm {
   /// 'i8*' type.
   Value *emitStrDup(Value *Ptr, IRBuilderBase &B, const TargetLibraryInfo *TLI);
 
-  /// Emit a call to the strnlen function to the builder, for the specified
-  /// pointer. Ptr is required to be some pointer type, MaxLen must be of size_t
-  /// type, and the return value has 'intptr_t' type.
-  Value *emitStrNLen(Value *Ptr, Value *MaxLen, IRBuilderBase &B,
-                     const DataLayout &DL, const TargetLibraryInfo *TLI);
-
   /// Emit a call to the strchr function to the builder, for the specified
   /// pointer and character. Ptr is required to be some pointer type, and the
   /// return value has 'i8*' type.
@@ -205,8 +199,8 @@ namespace llvm {
                     const TargetLibraryInfo *TLI);
 
   /// Emit a call to the calloc function.
-  Value *emitCalloc(Value *Num, Value *Size, const AttributeList &Attrs,
-                    IRBuilderBase &B, const TargetLibraryInfo &TLI);
+  Value *emitCalloc(Value *Num, Value *Size, IRBuilderBase &B,
+                    const TargetLibraryInfo &TLI);
 }
 
 #endif
diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h b/llvm/include/llvm/Transforms/Utils/Cloning.h
index f4fb265c25e0..5a1f322b2054 100644
--- a/llvm/include/llvm/Transforms/Utils/Cloning.h
+++ b/llvm/include/llvm/Transforms/Utils/Cloning.h
@@ -296,10 +296,10 @@ BasicBlock *DuplicateInstructionsInSplitBetween(BasicBlock *BB,
                                                 DomTreeUpdater &DTU);
 
 /// Updates profile information by adjusting the entry count by adding
-/// entryDelta then scaling callsite information by the new count divided by the
+/// EntryDelta then scaling callsite information by the new count divided by the
 /// old count. VMap is used during inlinng to also update the new clone
 void updateProfileCallee(
-    Function *Callee, int64_t entryDelta,
+    Function *Callee, int64_t EntryDelta,
     const ValueMap<const Value *, WeakTrackingVH> *VMap = nullptr);
 
 /// Find the 'llvm.experimental.noalias.scope.decl' intrinsics in the specified
diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h
index 1d9f2d135488..f08173e45a5b 100644
--- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h
+++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h
@@ -100,6 +100,10 @@ public:
     unsigned NumExitBlocks = std::numeric_limits<unsigned>::max();
     Type *RetTy;
 
+    // Mapping from the original exit blocks, to the new blocks inside
+    // the function.
+    SmallVector<BasicBlock *, 4> OldTargets;
+
     // Suffix to use when creating extracted function (appended to the original
     // function name + "."). If empty, the default is to use the entry block
     // label, if non-empty, otherwise "extracted".
@@ -139,6 +143,20 @@ public:
     /// returns false.
     Function *extractCodeRegion(const CodeExtractorAnalysisCache &CEAC);
 
+    /// Perform the extraction, returning the new function and providing an
+    /// interface to see what was categorized as inputs and outputs.
+    ///
+    /// \param CEAC - Cache to speed up operations for the CodeExtractor when
+    /// hoisting, and extracting lifetime values and assumes.
+    /// \param Inputs [out] - filled with  values marked as inputs to the
+    /// newly outlined function.
+     /// \param Outputs [out] - filled with values marked as outputs to the
+    /// newly outlined function.
+    /// \returns zero when called on a CodeExtractor instance where isEligible
+    /// returns false.
+    Function *extractCodeRegion(const CodeExtractorAnalysisCache &CEAC,
+                                ValueSet &Inputs, ValueSet &Outputs);
+
     /// Verify that assumption cache isn't stale after a region is extracted.
     /// Returns true when verifier finds errors. AssumptionCache is passed as
     /// parameter to make this function stateless.
diff --git a/llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h b/llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h
index 630f936471f2..0f32a97f9fcc 100644
--- a/llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h
@@ -40,7 +40,8 @@ bool isControlFlowEquivalent(const BasicBlock &BB0, const BasicBlock &BB1,
 bool isSafeToMoveBefore(Instruction &I, Instruction &InsertPoint,
                         DominatorTree &DT,
                         const PostDominatorTree *PDT = nullptr,
-                        DependenceInfo *DI = nullptr);
+                        DependenceInfo *DI = nullptr,
+                        bool CheckForEntireBlock = false);
 
 /// Return true if all instructions (except the terminator) in \p BB can be
 /// safely moved before \p InsertPoint.
@@ -62,6 +63,19 @@ void moveInstructionsToTheEnd(BasicBlock &FromBB, BasicBlock &ToBB,
                               DominatorTree &DT, const PostDominatorTree &PDT,
                               DependenceInfo &DI);
 
+/// In case that two BBs \p ThisBlock and \p OtherBlock are control flow
+/// equivalent but they do not strictly dominate and post-dominate each
+/// other, we determine if \p ThisBlock is reached after \p OtherBlock
+/// in the control flow.
+bool nonStrictlyPostDominate(const BasicBlock *ThisBlock,
+                             const BasicBlock *OtherBlock,
+                             const DominatorTree *DT,
+                             const PostDominatorTree *PDT);
+
+// Check if I0 is reached before I1 in the control flow.
+bool isReachedBefore(const Instruction *I0, const Instruction *I1,
+                     const DominatorTree *DT, const PostDominatorTree *PDT);
+
 } // end namespace llvm
 
 #endif // LLVM_TRANSFORMS_UTILS_CODEMOVERUTILS_H
diff --git a/llvm/include/llvm/Transforms/Utils/EntryExitInstrumenter.h b/llvm/include/llvm/Transforms/Utils/EntryExitInstrumenter.h
index 31c023019272..f2b038494a5d 100644
--- a/llvm/include/llvm/Transforms/Utils/EntryExitInstrumenter.h
+++ b/llvm/include/llvm/Transforms/Utils/EntryExitInstrumenter.h
@@ -27,6 +27,9 @@ struct EntryExitInstrumenterPass
 
   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
 
+  void printPipeline(raw_ostream &OS,
+                     function_ref<StringRef(StringRef)> MapClassName2PassName);
+
   bool PostInlining;
 
   static bool isRequired() { return true; }
diff --git a/llvm/include/llvm/Transforms/Utils/FunctionImportUtils.h b/llvm/include/llvm/Transforms/Utils/FunctionImportUtils.h
index 024d84a7abc8..749b7b2bb5d8 100644
--- a/llvm/include/llvm/Transforms/Utils/FunctionImportUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/FunctionImportUtils.h
@@ -130,9 +130,6 @@ bool renameModuleForThinLTO(
     bool ClearDSOLocalOnDeclarations,
     SetVector<GlobalValue *> *GlobalsToImport = nullptr);
 
-/// Compute synthetic function entry counts.
-void computeSyntheticCounts(ModuleSummaryIndex &Index);
-
 } // End llvm namespace
 
 #endif
diff --git a/llvm/include/llvm/Transforms/Utils/GlobalStatus.h b/llvm/include/llvm/Transforms/Utils/GlobalStatus.h
index 519593c96766..78d7845c4353 100644
--- a/llvm/include/llvm/Transforms/Utils/GlobalStatus.h
+++ b/llvm/include/llvm/Transforms/Utils/GlobalStatus.h
@@ -9,6 +9,7 @@
 #ifndef LLVM_TRANSFORMS_UTILS_GLOBALSTATUS_H
 #define LLVM_TRANSFORMS_UTILS_GLOBALSTATUS_H
 
+#include "llvm/IR/Instructions.h"
 #include "llvm/Support/AtomicOrdering.h"
 
 namespace llvm {
@@ -45,7 +46,7 @@ struct GlobalStatus {
 
     /// This global is stored to, but only its initializer and one other value
     /// is ever stored to it.  If this global isStoredOnce, we track the value
-    /// stored to it in StoredOnceValue below.  This is only tracked for scalar
+    /// stored to it via StoredOnceStore below.  This is only tracked for scalar
     /// globals.
     StoredOnce,
 
@@ -55,8 +56,16 @@ struct GlobalStatus {
   } StoredType = NotStored;
 
   /// If only one value (besides the initializer constant) is ever stored to
-  /// this global, keep track of what value it is.
-  Value *StoredOnceValue = nullptr;
+  /// this global, keep track of what value it is via the store instruction.
+  const StoreInst *StoredOnceStore = nullptr;
+
+  /// If only one value (besides the initializer constant) is ever stored to
+  /// this global return the stored value.
+  Value *getStoredOnceValue() const {
+    return (StoredType == StoredOnce && StoredOnceStore)
+               ? StoredOnceStore->getOperand(0)
+               : nullptr;
+  }
 
   /// These start out null/false.  When the first accessing function is noticed,
   /// it is recorded. When a second different accessing function is noticed,
diff --git a/llvm/include/llvm/Transforms/Utils/InstructionWorklist.h b/llvm/include/llvm/Transforms/Utils/InstructionWorklist.h
new file mode 100644
index 000000000000..a318c2cd28bb
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Utils/InstructionWorklist.h
@@ -0,0 +1,123 @@
+//=== InstructionWorklist.h - Worklist for InstCombine & others -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_INSTRUCTIONWORKLIST_H
+#define LLVM_TRANSFORMS_UTILS_INSTRUCTIONWORKLIST_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+/// InstructionWorklist - This is the worklist management logic for
+/// InstCombine and other simplification passes.
+class InstructionWorklist {
+  SmallVector<Instruction *, 256> Worklist;
+  DenseMap<Instruction *, unsigned> WorklistMap;
+  /// These instructions will be added in reverse order after the current
+  /// combine has finished. This means that these instructions will be visited
+  /// in the order they have been added.
+  SmallSetVector<Instruction *, 16> Deferred;
+
+public:
+  InstructionWorklist() = default;
+
+  InstructionWorklist(InstructionWorklist &&) = default;
+  InstructionWorklist &operator=(InstructionWorklist &&) = default;
+
+  bool isEmpty() const { return Worklist.empty() && Deferred.empty(); }
+
+  /// Add instruction to the worklist.
+  /// Instructions will be visited in the order they are added.
+  /// You likely want to use this method.
+  void add(Instruction *I) {
+    if (Deferred.insert(I))
+      LLVM_DEBUG(dbgs() << "ADD DEFERRED: " << *I << '\n');
+  }
+
+  /// Add value to the worklist if it is an instruction.
+  /// Instructions will be visited in the order they are added.
+  void addValue(Value *V) {
+    if (Instruction *I = dyn_cast<Instruction>(V))
+      add(I);
+  }
+
+  /// Push the instruction onto the worklist stack.
+  /// Instructions that have been added first will be visited last.
+  void push(Instruction *I) {
+    assert(I);
+    assert(I->getParent() && "Instruction not inserted yet?");
+
+    if (WorklistMap.insert(std::make_pair(I, Worklist.size())).second) {
+      LLVM_DEBUG(dbgs() << "ADD: " << *I << '\n');
+      Worklist.push_back(I);
+    }
+  }
+
+  void pushValue(Value *V) {
+    if (Instruction *I = dyn_cast<Instruction>(V))
+      push(I);
+  }
+
+  Instruction *popDeferred() {
+    if (Deferred.empty())
+      return nullptr;
+    return Deferred.pop_back_val();
+  }
+
+  void reserve(size_t Size) {
+    Worklist.reserve(Size + 16);
+    WorklistMap.reserve(Size);
+  }
+
+  /// Remove I from the worklist if it exists.
+  void remove(Instruction *I) {
+    DenseMap<Instruction *, unsigned>::iterator It = WorklistMap.find(I);
+    if (It != WorklistMap.end()) {
+      // Don't bother moving everything down, just null out the slot.
+      Worklist[It->second] = nullptr;
+      WorklistMap.erase(It);
+    }
+
+    Deferred.remove(I);
+  }
+
+  Instruction *removeOne() {
+    if (Worklist.empty())
+      return nullptr;
+    Instruction *I = Worklist.pop_back_val();
+    WorklistMap.erase(I);
+    return I;
+  }
+
+  /// When an instruction is simplified, add all users of the instruction
+  /// to the work lists because they might get more simplified now.
+  void pushUsersToWorkList(Instruction &I) {
+    for (User *U : I.users())
+      push(cast<Instruction>(U));
+  }
+
+  /// Check that the worklist is empty and nuke the backing store for the map.
+  void zap() {
+    assert(WorklistMap.empty() && "Worklist empty, but map not?");
+    assert(Deferred.empty() && "Deferred instructions left over");
+
+    // Do an explicit clear, this shrinks the map if needed.
+    WorklistMap.clear();
+  }
+};
+
+} // end namespace llvm.
+
+#endif
diff --git a/llvm/include/llvm/Transforms/Utils/Local.h b/llvm/include/llvm/Transforms/Utils/Local.h
index 0102aa9ef3cc..72cb606eb51a 100644
--- a/llvm/include/llvm/Transforms/Utils/Local.h
+++ b/llvm/include/llvm/Transforms/Utils/Local.h
@@ -55,6 +55,7 @@ class MDNode;
 class MemorySSAUpdater;
 class PHINode;
 class StoreInst;
+class SwitchInst;
 class TargetLibraryInfo;
 class TargetTransformInfo;
 
@@ -78,7 +79,8 @@ bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions = false,
 //
 
 /// Return true if the result produced by the instruction is not used, and the
-/// instruction has no side effects.
+/// instruction will return. Certain side-effecting instructions are also
+/// considered dead if there are no uses of the instruction.
 bool isInstructionTriviallyDead(Instruction *I,
                                 const TargetLibraryInfo *TLI = nullptr);
 
@@ -236,6 +238,10 @@ CallInst *createCallMatchingInvoke(InvokeInst *II);
 /// This function converts the specified invoek into a normall call.
 void changeToCall(InvokeInst *II, DomTreeUpdater *DTU = nullptr);
 
+/// This function removes the default destination from the specified switch.
+void createUnreachableSwitchDefault(SwitchInst *Switch,
+                                    DomTreeUpdater *DTU = nullptr);
+
 ///===---------------------------------------------------------------------===//
 ///  Dbg Intrinsic utilities
 ///
@@ -292,14 +298,30 @@ void salvageDebugInfo(Instruction &I);
 void salvageDebugInfoForDbgValues(Instruction &I,
                                   ArrayRef<DbgVariableIntrinsic *> Insns);
 
-/// Given an instruction \p I and DIExpression \p DIExpr operating on it, write
-/// the effects of \p I into the returned DIExpression, or return nullptr if
-/// it cannot be salvaged. \p StackVal: whether DW_OP_stack_value should be
-/// appended to the expression. \p LocNo: the index of the location operand to
-/// which \p I applies, should be 0 for debug info without a DIArgList.
-DIExpression *salvageDebugInfoImpl(Instruction &I, DIExpression *DIExpr,
-                                   bool StackVal, unsigned LocNo,
-                                   SmallVectorImpl<Value *> &AdditionalValues);
+/// Given an instruction \p I and DIExpression \p DIExpr operating on
+/// it, append the effects of \p I to the DIExpression operand list
+/// \p Ops, or return \p nullptr if it cannot be salvaged.
+/// \p CurrentLocOps is the number of SSA values referenced by the
+/// incoming \p Ops.  \return the first non-constant operand
+/// implicitly referred to by Ops. If \p I references more than one
+/// non-constant operand, any additional operands are added to
+/// \p AdditionalValues.
+///
+/// \example
+////
+///   I = add %a, i32 1
+///
+///   Return = %a
+///   Ops = llvm::dwarf::DW_OP_lit1 llvm::dwarf::DW_OP_add
+///
+///   I = add %a, %b
+///
+///   Return = %a
+///   Ops = llvm::dwarf::DW_OP_LLVM_arg0 llvm::dwarf::DW_OP_add
+///   AdditionalValues = %b
+Value *salvageDebugInfoImpl(Instruction &I, uint64_t CurrentLocOps,
+                            SmallVectorImpl<uint64_t> &Ops,
+                            SmallVectorImpl<Value *> &AdditionalValues);
 
 /// Point debug users of \p From to \p To or salvage them. Use this function
 /// only when replacing all uses of \p From with \p To, with a guarantee that
diff --git a/llvm/include/llvm/Transforms/Utils/LoopPeel.h b/llvm/include/llvm/Transforms/Utils/LoopPeel.h
index 8f857e1e5c21..6f1b4a880457 100644
--- a/llvm/include/llvm/Transforms/Utils/LoopPeel.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopPeel.h
@@ -32,8 +32,8 @@ gatherPeelingPreferences(Loop *L, ScalarEvolution &SE,
 
 void computePeelCount(Loop *L, unsigned LoopSize,
                       TargetTransformInfo::PeelingPreferences &PP,
-                      unsigned &TripCount, ScalarEvolution &SE,
-                      unsigned Threshold = UINT_MAX);
+                      unsigned &TripCount, DominatorTree &DT,
+                      ScalarEvolution &SE, unsigned Threshold = UINT_MAX);
 
 } // end namespace llvm
 
diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
index 247b911b7c8f..30c3f71e0947 100644
--- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
@@ -147,11 +147,22 @@ protected:
 /// BlockFrequencyInfo, TargetLibraryInfo, Loop, AliasSet information for all
 /// instructions of the loop and loop safety information as
 /// arguments. Diagnostics is emitted via \p ORE. It returns changed status.
+/// \p CurLoop is a loop to do sinking on. \p OutermostLoop is used only when
+/// this function is called by \p sinkRegionForLoopNest.
 bool sinkRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *,
                 BlockFrequencyInfo *, TargetLibraryInfo *,
-                TargetTransformInfo *, Loop *, AliasSetTracker *,
-                MemorySSAUpdater *, ICFLoopSafetyInfo *,
-                SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *);
+                TargetTransformInfo *, Loop *CurLoop, MemorySSAUpdater *,
+                ICFLoopSafetyInfo *, SinkAndHoistLICMFlags &,
+                OptimizationRemarkEmitter *, Loop *OutermostLoop = nullptr);
+
+/// Call sinkRegion on loops contained within the specified loop
+/// in order from innermost to outermost.
+bool sinkRegionForLoopNest(DomTreeNode *, AAResults *, LoopInfo *,
+                           DominatorTree *, BlockFrequencyInfo *,
+                           TargetLibraryInfo *, TargetTransformInfo *, Loop *,
+                           MemorySSAUpdater *, ICFLoopSafetyInfo *,
+                           SinkAndHoistLICMFlags &,
+                           OptimizationRemarkEmitter *);
 
 /// Walk the specified region of the CFG (defined by all blocks
 /// dominated by the specified block, and that are in the current loop) in depth
@@ -163,9 +174,8 @@ bool sinkRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *,
 /// Diagnostics is emitted via \p ORE. It returns changed status.
 bool hoistRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *,
                  BlockFrequencyInfo *, TargetLibraryInfo *, Loop *,
-                 AliasSetTracker *, MemorySSAUpdater *, ScalarEvolution *,
-                 ICFLoopSafetyInfo *, SinkAndHoistLICMFlags &,
-                 OptimizationRemarkEmitter *, bool);
+                 MemorySSAUpdater *, ScalarEvolution *, ICFLoopSafetyInfo *,
+                 SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *, bool);
 
 /// This function deletes dead loops. The caller of this function needs to
 /// guarantee that the loop is infact dead.
@@ -199,7 +209,7 @@ bool promoteLoopAccessesToScalars(
     const SmallSetVector<Value *, 8> &, SmallVectorImpl<BasicBlock *> &,
     SmallVectorImpl<Instruction *> &, SmallVectorImpl<MemoryAccess *> &,
     PredIteratorCache &, LoopInfo *, DominatorTree *, const TargetLibraryInfo *,
-    Loop *, AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *,
+    Loop *, MemorySSAUpdater *, ICFLoopSafetyInfo *,
     OptimizationRemarkEmitter *);
 
 /// Does a BFS from a given node to all of its children inside a given loop.
@@ -338,6 +348,18 @@ bool canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
                         SinkAndHoistLICMFlags *LICMFlags = nullptr,
                         OptimizationRemarkEmitter *ORE = nullptr);
 
+/// Returns the comparison predicate used when expanding a min/max reduction.
+CmpInst::Predicate getMinMaxReductionPredicate(RecurKind RK);
+
+/// See RecurrenceDescriptor::isSelectCmpPattern for a description of the
+/// pattern we are trying to match. In this pattern we are only ever selecting
+/// between two values: 1) an initial PHI start value, and 2) a loop invariant
+/// value. This function uses \p LoopExitInst to determine 2), which we then use
+/// to select between \p Left and \p Right. Any lane value in \p Left that
+/// matches 2) will be merged into \p Right.
+Value *createSelectCmpOp(IRBuilderBase &Builder, Value *StartVal, RecurKind RK,
+                         Value *Left, Value *Right);
+
 /// Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
 /// The Builder's fast-math-flags must be set to propagate the expected values.
 Value *createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left,
@@ -365,12 +387,22 @@ Value *createSimpleTargetReduction(IRBuilderBase &B,
                                    RecurKind RdxKind,
                                    ArrayRef<Value *> RedOps = None);
 
+/// Create a target reduction of the given vector \p Src for a reduction of the
+/// kind RecurKind::SelectICmp or RecurKind::SelectFCmp. The reduction operation
+/// is described by \p Desc.
+Value *createSelectCmpTargetReduction(IRBuilderBase &B,
+                                      const TargetTransformInfo *TTI,
+                                      Value *Src,
+                                      const RecurrenceDescriptor &Desc,
+                                      PHINode *OrigPhi);
+
 /// Create a generic target reduction using a recurrence descriptor \p Desc
 /// The target is queried to determine if intrinsics or shuffle sequences are
 /// required to implement the reduction.
 /// Fast-math-flags are propagated using the RecurrenceDescriptor.
 Value *createTargetReduction(IRBuilderBase &B, const TargetTransformInfo *TTI,
-                             const RecurrenceDescriptor &Desc, Value *Src);
+                             const RecurrenceDescriptor &Desc, Value *Src,
+                             PHINode *OrigPhi = nullptr);
 
 /// Create an ordered reduction intrinsic using the given recurrence
 /// descriptor \p Desc.
@@ -463,12 +495,8 @@ Loop *cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM,
                 LoopInfo *LI, LPPassManager *LPM);
 
 /// Add code that checks at runtime if the accessed arrays in \p PointerChecks
-/// overlap.
-///
-/// Returns a pair of instructions where the first element is the first
-/// instruction generated in possibly a sequence of instructions and the
-/// second value is the final comparator value or NULL if no check is needed.
-std::pair<Instruction *, Instruction *>
+/// overlap. Returns the final comparator value or NULL if no check is needed.
+Value *
 addRuntimeChecks(Instruction *Loc, Loop *TheLoop,
                  const SmallVectorImpl<RuntimePointerCheck> &PointerChecks,
                  SCEVExpander &Expander);
diff --git a/llvm/include/llvm/Transforms/Utils/MemoryOpRemark.h b/llvm/include/llvm/Transforms/Utils/MemoryOpRemark.h
index 7b4a1cdbf4fd..e5f8a46eaf23 100644
--- a/llvm/include/llvm/Transforms/Utils/MemoryOpRemark.h
+++ b/llvm/include/llvm/Transforms/Utils/MemoryOpRemark.h
@@ -1,9 +1,8 @@
 //===- MemoryOpRemark.h - Memory operation remark analysis -*- C++ ------*-===//
 //
-//                      The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/llvm/include/llvm/Transforms/Utils/PredicateInfo.h b/llvm/include/llvm/Transforms/Utils/PredicateInfo.h
index c4030735d965..c922476ac79d 100644
--- a/llvm/include/llvm/Transforms/Utils/PredicateInfo.h
+++ b/llvm/include/llvm/Transforms/Utils/PredicateInfo.h
@@ -51,11 +51,13 @@
 #define LLVM_TRANSFORMS_UTILS_PREDICATEINFO_H
 
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/ilist.h"
 #include "llvm/ADT/ilist_node.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/IR/Value.h"
+#include "llvm/IR/ValueHandle.h"
 #include "llvm/Pass.h"
 
 namespace llvm {
@@ -176,7 +178,7 @@ public:
 class PredicateInfo {
 public:
   PredicateInfo(Function &, DominatorTree &, AssumptionCache &);
-  ~PredicateInfo() = default;
+  ~PredicateInfo();
 
   void verifyPredicateInfo() const;
 
@@ -203,6 +205,8 @@ private:
   // the Predicate Info, they belong to the ValueInfo structs in the ValueInfos
   // vector.
   DenseMap<const Value *, const PredicateBase *> PredicateMap;
+  // The set of ssa_copy declarations we created with our custom mangling.
+  SmallSet<AssertingVH<Function>, 20> CreatedDeclarations;
 };
 
 // This pass does eager building and then printing of PredicateInfo. It is used
diff --git a/llvm/include/llvm/Transforms/Utils/SSAUpdaterBulk.h b/llvm/include/llvm/Transforms/Utils/SSAUpdaterBulk.h
index 3a78e22b7e94..5de575aed059 100644
--- a/llvm/include/llvm/Transforms/Utils/SSAUpdaterBulk.h
+++ b/llvm/include/llvm/Transforms/Utils/SSAUpdaterBulk.h
@@ -70,10 +70,6 @@ public:
   /// rewritten value when RewriteAllUses is called.
   void AddUse(unsigned Var, Use *U);
 
-  /// Return true if the SSAUpdater already has a value for the specified
-  /// variable in the specified block.
-  bool HasValueForBlock(unsigned Var, BasicBlock *BB);
-
   /// Perform all the necessary updates, including new PHI-nodes insertion and
   /// the requested uses update.
   ///
diff --git a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h
index e0759d359dbe..6a2f0acf46f3 100644
--- a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h
+++ b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h
@@ -56,27 +56,28 @@ template <> struct IRTraits<BasicBlock> {
   using FunctionT = Function;
   using BlockFrequencyInfoT = BlockFrequencyInfo;
   using LoopT = Loop;
-  using LoopInfoT = LoopInfo;
+  using LoopInfoPtrT = std::unique_ptr<LoopInfo>;
+  using DominatorTreePtrT = std::unique_ptr<DominatorTree>;
+  using PostDominatorTreeT = PostDominatorTree;
+  using PostDominatorTreePtrT = std::unique_ptr<PostDominatorTree>;
   using OptRemarkEmitterT = OptimizationRemarkEmitter;
   using OptRemarkAnalysisT = OptimizationRemarkAnalysis;
-  using DominatorTreeT = DominatorTree;
-  using PostDominatorTreeT = PostDominatorTree;
+  using PredRangeT = pred_range;
+  using SuccRangeT = succ_range;
   static Function &getFunction(Function &F) { return F; }
   static const BasicBlock *getEntryBB(const Function *F) {
     return &F->getEntryBlock();
   }
+  static pred_range getPredecessors(BasicBlock *BB) { return predecessors(BB); }
+  static succ_range getSuccessors(BasicBlock *BB) { return successors(BB); }
 };
 
 } // end namespace afdo_detail
 
-extern cl::opt<unsigned> SampleProfileMaxPropagateIterations;
-extern cl::opt<unsigned> SampleProfileRecordCoverage;
-extern cl::opt<unsigned> SampleProfileSampleCoverage;
-extern cl::opt<bool> NoWarnSampleUnused;
-
 template <typename BT> class SampleProfileLoaderBaseImpl {
 public:
-  SampleProfileLoaderBaseImpl(std::string Name) : Filename(Name) {}
+  SampleProfileLoaderBaseImpl(std::string Name, std::string RemapName)
+      : Filename(Name), RemappingFilename(RemapName) {}
   void dump() { Reader->dump(); }
 
   using InstructionT = typename afdo_detail::IRTraits<BT>::InstructionT;
@@ -85,14 +86,19 @@ public:
       typename afdo_detail::IRTraits<BT>::BlockFrequencyInfoT;
   using FunctionT = typename afdo_detail::IRTraits<BT>::FunctionT;
   using LoopT = typename afdo_detail::IRTraits<BT>::LoopT;
-  using LoopInfoT = typename afdo_detail::IRTraits<BT>::LoopInfoT;
+  using LoopInfoPtrT = typename afdo_detail::IRTraits<BT>::LoopInfoPtrT;
+  using DominatorTreePtrT =
+      typename afdo_detail::IRTraits<BT>::DominatorTreePtrT;
+  using PostDominatorTreePtrT =
+      typename afdo_detail::IRTraits<BT>::PostDominatorTreePtrT;
+  using PostDominatorTreeT =
+      typename afdo_detail::IRTraits<BT>::PostDominatorTreeT;
   using OptRemarkEmitterT =
       typename afdo_detail::IRTraits<BT>::OptRemarkEmitterT;
   using OptRemarkAnalysisT =
       typename afdo_detail::IRTraits<BT>::OptRemarkAnalysisT;
-  using DominatorTreeT = typename afdo_detail::IRTraits<BT>::DominatorTreeT;
-  using PostDominatorTreeT =
-      typename afdo_detail::IRTraits<BT>::PostDominatorTreeT;
+  using PredRangeT = typename afdo_detail::IRTraits<BT>::PredRangeT;
+  using SuccRangeT = typename afdo_detail::IRTraits<BT>::SuccRangeT;
 
   using BlockWeightMap = DenseMap<const BasicBlockT *, uint64_t>;
   using EquivalenceClassMap =
@@ -112,6 +118,12 @@ protected:
   const BasicBlockT *getEntryBB(const FunctionT *F) {
     return afdo_detail::IRTraits<BT>::getEntryBB(F);
   }
+  PredRangeT getPredecessors(BasicBlockT *BB) {
+    return afdo_detail::IRTraits<BT>::getPredecessors(BB);
+  }
+  SuccRangeT getSuccessors(BasicBlockT *BB) {
+    return afdo_detail::IRTraits<BT>::getSuccessors(BB);
+  }
 
   unsigned getFunctionLoc(FunctionT &Func);
   virtual ErrorOr<uint64_t> getInstWeight(const InstructionT &Inst);
@@ -129,12 +141,11 @@ protected:
   void findEquivalencesFor(BasicBlockT *BB1,
                            ArrayRef<BasicBlockT *> Descendants,
                            PostDominatorTreeT *DomTree);
-
   void propagateWeights(FunctionT &F);
   uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge);
   void buildEdges(FunctionT &F);
   bool propagateThroughEdges(FunctionT &F, bool UpdateBlockCount);
-  void clearFunctionData();
+  void clearFunctionData(bool ResetDT = true);
   void computeDominanceAndLoopInfo(FunctionT &F);
   bool
   computeAndPropagateWeights(FunctionT &F,
@@ -168,9 +179,9 @@ protected:
   EquivalenceClassMap EquivalenceClass;
 
   /// Dominance, post-dominance and loop information.
-  std::unique_ptr<DominatorTreeT> DT;
-  std::unique_ptr<PostDominatorTreeT> PDT;
-  std::unique_ptr<LoopInfoT> LI;
+  DominatorTreePtrT DT;
+  PostDominatorTreePtrT PDT;
+  LoopInfoPtrT LI;
 
   /// Predecessors for each basic block in the CFG.
   BlockEdgeMap Predecessors;
@@ -190,6 +201,9 @@ protected:
   /// Name of the profile file to load.
   std::string Filename;
 
+  /// Name of the profile remapping file to load.
+  std::string RemappingFilename;
+
   /// Profile Summary Info computed from sample profile.
   ProfileSummaryInfo *PSI = nullptr;
 
@@ -199,15 +213,17 @@ protected:
 
 /// Clear all the per-function data used to load samples and propagate weights.
 template <typename BT>
-void SampleProfileLoaderBaseImpl<BT>::clearFunctionData() {
+void SampleProfileLoaderBaseImpl<BT>::clearFunctionData(bool ResetDT) {
   BlockWeights.clear();
   EdgeWeights.clear();
   VisitedBlocks.clear();
   VisitedEdges.clear();
   EquivalenceClass.clear();
-  DT = nullptr;
-  PDT = nullptr;
-  LI = nullptr;
+  if (ResetDT) {
+    DT = nullptr;
+    PDT = nullptr;
+    LI = nullptr;
+  }
   Predecessors.clear();
   Successors.clear();
   CoverageTracker.clear();
@@ -475,7 +491,7 @@ void SampleProfileLoaderBaseImpl<BT>::findEquivalenceClasses(FunctionT &F) {
     // class by making BB2's equivalence class be BB1.
     DominatedBBs.clear();
     DT->getDescendants(BB1, DominatedBBs);
-    findEquivalencesFor(BB1, DominatedBBs, PDT.get());
+    findEquivalencesFor(BB1, DominatedBBs, &*PDT);
 
     LLVM_DEBUG(printBlockEquivalence(dbgs(), BB1));
   }
@@ -692,7 +708,7 @@ void SampleProfileLoaderBaseImpl<BT>::buildEdges(FunctionT &F) {
     SmallPtrSet<BasicBlockT *, 16> Visited;
     if (!Predecessors[B1].empty())
       llvm_unreachable("Found a stale predecessors list in a basic block.");
-    for (BasicBlockT *B2 : predecessors(B1))
+    for (auto *B2 : getPredecessors(B1))
       if (Visited.insert(B2).second)
         Predecessors[B1].push_back(B2);
 
@@ -700,7 +716,7 @@ void SampleProfileLoaderBaseImpl<BT>::buildEdges(FunctionT &F) {
     Visited.clear();
     if (!Successors[B1].empty())
       llvm_unreachable("Found a stale successors list in a basic block.");
-    for (BasicBlockT *B2 : successors(B1))
+    for (auto *B2 : getSuccessors(B1))
       if (Visited.insert(B2).second)
         Successors[B1].push_back(B2);
   }
@@ -911,12 +927,12 @@ unsigned SampleProfileLoaderBaseImpl<BT>::getFunctionLoc(FunctionT &F) {
 template <typename BT>
 void SampleProfileLoaderBaseImpl<BT>::computeDominanceAndLoopInfo(
     FunctionT &F) {
-  DT.reset(new DominatorTreeT);
+  DT.reset(new DominatorTree);
   DT->recalculate(F);
 
   PDT.reset(new PostDominatorTree(F));
 
-  LI.reset(new LoopInfoT);
+  LI.reset(new LoopInfo);
   LI->analyze(*DT);
 }
 
diff --git a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
index 59bf3a342caa..efc3cc775e11 100644
--- a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
+++ b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
@@ -32,8 +32,10 @@ extern cl::opt<unsigned> SCEVCheapExpansionBudget;
 
 /// Return true if the given expression is safe to expand in the sense that
 /// all materialized values are safe to speculate anywhere their operands are
-/// defined.
-bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE);
+/// defined, and the expander is capable of expanding the expression.
+/// CanonicalMode indicates whether the expander will be used in canonical mode.
+bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE,
+                    bool CanonicalMode = true);
 
 /// Return true if the given expression is safe to expand in the sense that
 /// all materialized values are defined and safe to speculate at the specified
@@ -489,9 +491,6 @@ private:
   Value *expandIVInc(PHINode *PN, Value *StepV, const Loop *L, Type *ExpandTy,
                      Type *IntTy, bool useSubtract);
 
-  void hoistBeforePos(DominatorTree *DT, Instruction *InstToHoist,
-                      Instruction *Pos, PHINode *LoopPhi);
-
   void fixupInsertPoints(Instruction *I);
 
   /// If required, create LCSSA PHIs for \p Users' operand \p OpIdx. If new
diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
index 8703434e1696..a88e72fc9ba8 100644
--- a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
+++ b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
@@ -132,8 +132,6 @@ private:
     eraseFromParent(I);
   }
 
-  Value *foldMallocMemset(CallInst *Memset, IRBuilderBase &B);
-
 public:
   LibCallSimplifier(
       const DataLayout &DL, const TargetLibraryInfo *TLI,
diff --git a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
index d95ead2def3d..320c36b36924 100644
--- a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
+++ b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
@@ -117,7 +117,8 @@ MDNode *GetUnrollMetadata(MDNode *LoopID, StringRef Name);
 
 TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
     Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI,
-    BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, int OptLevel,
+    BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
+    llvm::OptimizationRemarkEmitter &ORE, int OptLevel,
     Optional<unsigned> UserThreshold, Optional<unsigned> UserCount,
     Optional<bool> UserAllowPartial, Optional<bool> UserRuntime,
     Optional<bool> UserUpperBound, Optional<unsigned> UserFullUnrollMaxCount);
diff --git a/llvm/include/llvm/Transforms/Utils/ValueMapper.h b/llvm/include/llvm/Transforms/Utils/ValueMapper.h
index 4245f51cc1e2..95fd0b14dd51 100644
--- a/llvm/include/llvm/Transforms/Utils/ValueMapper.h
+++ b/llvm/include/llvm/Transforms/Utils/ValueMapper.h
@@ -22,7 +22,6 @@ namespace llvm {
 
 class Constant;
 class Function;
-class GlobalIndirectSymbol;
 class GlobalVariable;
 class Instruction;
 class MDNode;
@@ -122,7 +121,8 @@ inline RemapFlags operator|(RemapFlags LHS, RemapFlags RHS) {
 /// instance:
 /// - \a scheduleMapGlobalInitializer()
 /// - \a scheduleMapAppendingVariable()
-/// - \a scheduleMapGlobalIndirectSymbol()
+/// - \a scheduleMapGlobalAlias()
+/// - \a scheduleMapGlobalIFunc()
 /// - \a scheduleRemapFunction()
 ///
 /// Sometimes a callback needs a different mapping context.  Such a context can
@@ -182,9 +182,10 @@ public:
                                     bool IsOldCtorDtor,
                                     ArrayRef<Constant *> NewMembers,
                                     unsigned MappingContextID = 0);
-  void scheduleMapGlobalIndirectSymbol(GlobalIndirectSymbol &GIS,
-                                       Constant &Target,
-                                       unsigned MappingContextID = 0);
+  void scheduleMapGlobalAlias(GlobalAlias &GA, Constant &Aliasee,
+                              unsigned MappingContextID = 0);
+  void scheduleMapGlobalIFunc(GlobalIFunc &GI, Constant &Resolver,
+                              unsigned MappingContextID = 0);
   void scheduleRemapFunction(Function &F, unsigned MappingContextID = 0);
 };
 
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index e7dcdda8af89..ed9e0beb0339 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -340,7 +340,7 @@ public:
   /// -1 - Address is consecutive, and decreasing.
   /// NOTE: This method must only be used before modifying the original scalar
   /// loop. Do not use after invoking 'createVectorizedLoopSkeleton' (PR34965).
-  int isConsecutivePtr(Value *Ptr) const;
+  int isConsecutivePtr(Type *AccessTy, Value *Ptr) const;
 
   /// Returns true if the value V is uniform within the loop.
   bool isUniform(Value *V);
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h
index ad6a4b561a9b..d105496ad47f 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h
@@ -153,6 +153,8 @@ public:
   ProfileSummaryInfo *PSI;
 
   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+  void printPipeline(raw_ostream &OS,
+                     function_ref<StringRef(StringRef)> MapClassName2PassName);
 
   // Shim for old PM.
   LoopVectorizeResult
diff --git a/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h b/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h
index f416a592d683..cd605aacb52d 100644
--- a/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h
+++ b/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h
@@ -94,9 +94,11 @@ private:
   bool tryToVectorizePair(Value *A, Value *B, slpvectorizer::BoUpSLP &R);
 
   /// Try to vectorize a list of operands.
+  /// \param LimitForRegisterSize Vectorize only using maximal allowed register
+  /// size.
   /// \returns true if a value was vectorized.
   bool tryToVectorizeList(ArrayRef<Value *> VL, slpvectorizer::BoUpSLP &R,
-                          bool AllowReorder = false);
+                          bool LimitForRegisterSize = false);
 
   /// Try to vectorize a chain that may start at the operands of \p I.
   bool tryToVectorize(Instruction *I, slpvectorizer::BoUpSLP &R);
diff --git a/llvm/include/llvm/Transforms/Vectorize/VectorCombine.h b/llvm/include/llvm/Transforms/Vectorize/VectorCombine.h
index b7809aa24cae..a32f9fba967f 100644
--- a/llvm/include/llvm/Transforms/Vectorize/VectorCombine.h
+++ b/llvm/include/llvm/Transforms/Vectorize/VectorCombine.h
@@ -20,10 +20,16 @@
 namespace llvm {
 
 /// Optimize scalar/vector interactions in IR using target cost models.
-struct VectorCombinePass : public PassInfoMixin<VectorCombinePass> {
+class VectorCombinePass : public PassInfoMixin<VectorCombinePass> {
+  /// If true only perform scalarization combines and do not introduce new
+  /// vector operations.
+  bool ScalarizationOnly;
+
 public:
+  VectorCombinePass(bool ScalarizationOnly = false)
+      : ScalarizationOnly(ScalarizationOnly) {}
+
   PreservedAnalyses run(Function &F, FunctionAnalysisManager &);
 };
-
 }
 #endif // LLVM_TRANSFORMS_VECTORIZE_VECTORCOMBINE_H
diff --git a/llvm/include/llvm/WindowsManifest/WindowsManifestMerger.h b/llvm/include/llvm/WindowsManifest/WindowsManifestMerger.h
index 31f4daeb7019..2da74bb9dce8 100644
--- a/llvm/include/llvm/WindowsManifest/WindowsManifestMerger.h
+++ b/llvm/include/llvm/WindowsManifest/WindowsManifestMerger.h
@@ -30,6 +30,7 @@
 namespace llvm {
 
 class MemoryBuffer;
+class MemoryBufferRef;
 
 namespace windows_manifest {
 
@@ -49,7 +50,7 @@ class WindowsManifestMerger {
 public:
   WindowsManifestMerger();
   ~WindowsManifestMerger();
-  Error merge(const MemoryBuffer &Manifest);
+  Error merge(MemoryBufferRef Manifest);
 
   // Returns vector containing merged xml manifest, or uninitialized vector for
   // empty manifest.
diff --git a/llvm/include/llvm/module.modulemap b/llvm/include/llvm/module.modulemap
index 848fb266374e..6cbbb9a4028e 100644
--- a/llvm/include/llvm/module.modulemap
+++ b/llvm/include/llvm/module.modulemap
@@ -181,21 +181,9 @@ module LLVM_ExecutionEngine {
   // translation unit (or none) and aren't part of this module.
   exclude header "ExecutionEngine/MCJIT.h"
   exclude header "ExecutionEngine/Interpreter.h"
-  exclude header "ExecutionEngine/OrcMCJITReplacement.h"
-
-  // FIXME: These exclude directives were added as a workaround for
-  //        <rdar://problem/29247092> and should be removed once it is fixed.
-  exclude header "ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h"
-  exclude header "ExecutionEngine/Orc/OrcRemoteTargetClient.h"
-  exclude header "ExecutionEngine/Orc/OrcRemoteTargetServer.h"
-  exclude header "ExecutionEngine/Orc/RemoteObjectLayer.h"
 
   // Exclude headers from LLVM_OrcSupport.
   exclude header "ExecutionEngine/Orc/Shared/OrcError.h"
-  exclude header "ExecutionEngine/Orc/RPC/RPCUtils.h"
-  exclude header "ExecutionEngine/Orc/RPC/RPCSerialization.h"
-  exclude header "ExecutionEngine/Orc/RPC/RawByteChannel.h"
-
 }
 
 module LLVM_FileCheck {
@@ -221,9 +209,6 @@ module LLVM_OrcSupport {
   requires cplusplus
 
   header "ExecutionEngine/Orc/Shared/OrcError.h"
-  header "ExecutionEngine/Orc/Shared/RPCUtils.h"
-  header "ExecutionEngine/Orc/Shared/Serialization.h"
-  header "ExecutionEngine/Orc/Shared/RawByteChannel.h"
 
   export *
 }
@@ -389,6 +374,9 @@ module LLVM_Transforms {
   umbrella "Transforms"
 
   module * { export * }
+
+  // Requires DEBUG_TYPE to be defined by including file.
+  exclude header "Transforms/Utils/InstructionWorklist.h"
 }
 
 extern module LLVM_Extern_Utils_DataTypes "module.extern.modulemap"
-- 
cgit v1.2.3